1 PREPARATIONS

1.1 LOAD ESSENTIAL LIBRARIES

library(openxlsx)
library(tidyverse)
library(tidyr)
library(dplyr)
library(purrr)
library(magrittr)
library(broom)
library(stringi)
library(RColorBrewer)
library(table1)
library(gplots)
library(ggplot2)
library(ggdendro)
library(dendextend)
library(ggridges)
library(cowplot) 
library(ggpubr)
library(factoextra)
library(FactoMineR)
require(graphics)
library(corrplot)
library(PupillometryR)
library(fmsb)
library(ggsci)
library(psych)
library(viridis)
library(mgcv)
library(Hmisc)
library(MASS)
library(tis)
library(Boruta)
library(ROCR)
library(earth)
library(Lahman)
library(rstanarm)
#options(mc.cores = parallel::detectCores())

1.2 SETUP AND FUNCTIONS

col <- brewer.pal(n=11, name="RdBu")
render.cont <- function(x) { with(stats.default(x), c("", "Mean (SD)" = sprintf("%0.1f (%0.1f)", MEAN, SD), "Median (IQR)" = sprintf("%0.1f (%0.1f to %0.1f)", MEDIAN, Q1, Q3), "Range" = sprintf("%0.0f to %0.0f", MIN, MAX))) }
render.cat <- function(x) { c("", sapply(stats.default(x), function(y) with(y, sprintf("%d (%0.1f%%)", FREQ, PCTnoNA)))) }

cor.mtest <- function(mat, ...) {
  mat <- as.matrix(mat)
  n <- ncol(mat)
  p.mat<- matrix(NA, n, n)
  diag(p.mat) <- 0
  for (i in 1:(n - 1)) {
    for (j in (i + 1):n) {
      tmp <- cor.test(mat[, i], mat[, j], ...)
      p.mat[i, j] <- p.mat[j, i] <- tmp$p.value
    }
  }
  colnames(p.mat) <- rownames(p.mat) <- colnames(mat)
  p.mat
}

logit = function(x) log(x/(1-x))
inv.logit = function(x) exp(x)/(1+exp(x))
qsum = function(x) c(quantile(x,probs=c(0.5,0.025,0.975)))

# Throughout the manuscript, the significance level alpha chosen is 0.01
signifiance_stars <- list(cutpoints = c(0, 0.00001, 0.0001, 0.001, 0.01, Inf), symbols = c("****", "***", "**", "*", "ns"))


col1 <- c("#0073C299", "#EFC00099", "#86868699", "#CD534C99", # normal JCO
         "#7AA6DC99", "#003C6799", "#8F770099", "#3B3B3B99")
         
col2 <- c("#EFC00099", "#86868699") # when comparing infected versus non-infected

col3 <- c("#0073C299", "#86868699", "#CD534C99", # when non-infected/non-vaccinated are dropped
         "#7AA6DC99", "#003C6799", "#8F770099", "#3B3B3B99")
         
col4 <- c("#4169e1", "#FF00E0", "#FFC501") # for antibody isotypes

col5 <- c("#0073C299", "#86868699", "#7AA6DC99") # when comparing those with acute infection (influenza, inf/non-vac, inf/vac)

1.3 LOAD, PREPROCESS, AND PROCESS DATA

1.3.1 We read the data

data <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'Tabelle1')
aPL <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'All_aPL')
CoV2 <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'All_TRABI')
data_vertical <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'Sheet1')
data_vertical_tp <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'Sheet2')
published_data <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'published_data')
published_data_comparison <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'published_comparison')
Cytokines <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'Sheet3')
Cytokines_classifications <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'Cyto_groups')
Cytokines_ref <- read.xlsx('..\\Dataset\\Prothrombin_follow_up_v02.xlsx', na.strings = c("NA", ""), sheet = 'Cytokines_ref')

1.3.2 We add the aPL and the CoV2 datasets

data = merge(x = data, y = aPL, by = "Unique_sample_ID_for_study", all = TRUE)
## Warning in merge.data.frame(x = data, y = aPL, by =
## "Unique_sample_ID_for_study", : column name 'Old_study_ID' is duplicated in the
## result
data = merge(x = data, y = CoV2, by = "Unique_sample_ID_for_study", all = TRUE)
## Warning in merge.data.frame(x = data, y = CoV2, by =
## "Unique_sample_ID_for_study", : column name 'Old_study_ID' is duplicated in the
## result

1.3.3 We clean up a bit by retaining only essential information for the subsequent steps

data <- data %>% 
  dplyr::select(Unique_sample_ID_for_study, Patient_study_ID, Unique_patient_ID,
         Sex, Age, Sample_type, Cohort_type, Timepoint, Vaccination_statusonly_CoV2,
         Thrombosis_group, 
         `COVID_vaccination_Group`, 
         `DPOCoV2_or_FLU`, `SeverityCoV2_or_Flu`, Acute_SARS_CoV_2_infection, Anticoagulation.at.event:Immunosuppressed_admission, 
         CL_IgG:PT_IgA, Spike_IgG:NC_IgA
         )

published_data_comparison = published_data_comparison %>%
  dplyr::mutate(aPL_signal=as.numeric(aPL_signal))
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion

1.3.4 We generate an extra dataframe that can be used for cohort characterisation/demography

data_demo <- data %>% # Here, we want to have only the first timepoint for all samples. N=31 have two samples.
  dplyr::filter(!(Timepoint==2)
  )

1.3.5 We preprocess the dataset containing already published data (a comparison dataset)

published_data_comparison_NA <- published_data_comparison %>%
  filter(!is.na(as.numeric(aPL_signal)))

published_data_comparison_NA$aPL_signal <- as.numeric(published_data_comparison_NA$aPL_signal)

1.3.6 We conduct a PCA on the CoV-2 IgG and IgA data

We have done so in our previous study (https://doi.org/10.1371/journal.ppat.1010118) as a compound metric to combine the measurements against the spike, the RBD, and the NC protein. As, here, we have IgG and IgA measurements, we have to do it for both of them, separately. We will thus end up with two compound scores, instead of six individual measurements, which can facilitate correlating data subsequently.

mat_data_select_IgG <- data.matrix(data[51:53])
rownames(mat_data_select_IgG) <- data[,1]                  

mat_data_select_IgA <- data.matrix(data[54:56])
rownames(mat_data_select_IgA) <- data[,1]                  

set.seed(42)
mat_data_select_IgG.pca <- prcomp(na.omit(mat_data_select_IgG), center = TRUE, scale = FALSE)

set.seed(42)
mat_data_select_IgA.pca <- prcomp(na.omit(mat_data_select_IgA), center = TRUE, scale = FALSE)

PCA_IgG_plasma <- as.data.frame(mat_data_select_IgG.pca$x)
PCA_IgA_plasma <- as.data.frame(mat_data_select_IgA.pca$x)

PCA_IgG_plasma <- tibble::rownames_to_column(PCA_IgG_plasma, 'Unique_sample_ID_for_study')
PCA_IgA_plasma <- tibble::rownames_to_column(PCA_IgA_plasma, 'Unique_sample_ID_for_study')

data <- data  %>%
  dplyr::left_join(dplyr::select(PCA_IgG_plasma, Unique_sample_ID_for_study, PC1), by = "Unique_sample_ID_for_study") %>%
  dplyr::rename('PC1_IgG'='PC1') %>%
  dplyr::left_join(dplyr::select(PCA_IgA_plasma, Unique_sample_ID_for_study, PC1), by = "Unique_sample_ID_for_study") %>%
  dplyr::rename('PC1_IgA'='PC1')

data$PC1_IgA <- data$PC1_IgA*(-1) #change directionality to align with IgG

data_PCA <- data %>%
  dplyr::select(Unique_sample_ID_for_study, PC1_IgG, PC1_IgA) #Update this dataset, too
data_vertical <- left_join(data_vertical, data_PCA, by = "Unique_sample_ID_for_study")

pairs.panels(data[c(51:58)], alpha=0.01, stars=TRUE, ci=TRUE, method = 'spearman', cor=TRUE, lm=TRUE, smooth = TRUE, scale = FALSE, ellipses = FALSE)

1.3.7 We calculate the mean value of IgG, IgM, and IgA aPL per sample (patient)

data$aPL_IgG_rowmean <- as.numeric(rowMeans(data[,21:30]))
data$aPL_IgM_rowmean <- as.numeric(rowMeans(data[,31:40]))
data$aPL_IgA_rowmean <- as.numeric(rowMeans(data[,41:50]))

1.3.8 Lastly, we add the cytokines datasets

data = merge(x = data, y = Cytokines, by = "Unique_sample_ID_for_study", all = TRUE)

cytokines_z <- data %>%
    dplyr::mutate(as.data.frame(scale(data[,62:78]))) %>% # Z score of cytokine values
    dplyr::select(Unique_sample_ID_for_study:Acute_SARS_CoV_2_infection,
                  GCSF:Inflammatory_index, -DPOCoV2_or_FLU) %>%
    dplyr::mutate(Acute_SARS_CoV_2_infection = ifelse(COVID_vaccination_Group == '00_Influenza_Non-infected/non-vaccinated',  TRUE, Acute_SARS_CoV_2_infection)) %>% #Careful, this dataset is strictly used for some cytokine-related analyses. We have changed the acute infection entries here. While, everywhere else, this refers to acute infection with SARS-CoV-2, we change it here to an acute infection with SARS-CoV-2 OR with influenza
  dplyr::rename(Acute_infection=Acute_SARS_CoV_2_infection) %>% #the semantic change is followed by the nominal change
    na.omit()

cytokines_acute <- data %>%
    dplyr::select(Unique_sample_ID_for_study:Acute_SARS_CoV_2_infection,
                  GCSF:Inflammatory_index, -DPOCoV2_or_FLU) %>%
    dplyr::mutate(Acute_SARS_CoV_2_infection = ifelse(COVID_vaccination_Group == '00_Influenza_Non-infected/non-vaccinated',  TRUE, Acute_SARS_CoV_2_infection)) %>% #Careful, this dataset is strictly used for some cytokine-related analyses. We have changed the acute infection entries here. While, everywhere else, this refers to acute infection with SARS-CoV-2, we change it here to an acute infection with SARS-CoV-2 OR with influenza
  dplyr::rename(Acute_infection=Acute_SARS_CoV_2_infection) %>% #the semantic change is followed by the nominal change
    na.omit()

cytokines_vertical <- left_join(data_vertical, Cytokines, by = "Unique_sample_ID_for_study")

cytokines_vertical_z <- cytokines_vertical %>%
  dplyr::mutate(as.data.frame(scale(cytokines_vertical[,21:38]))) # Z score of cytokine values

cytokines_vertical = cytokines_vertical %>%
  dplyr::select(Unique_sample_ID_for_study:Inflammatory_index) %>%
  tidyr::pivot_longer(GCSF:Inflammatory_index,
               names_to = c('Cytokine_signal'),
               values_to = c('Cytokine_value'))

cytokines_vertical_z = cytokines_vertical_z %>%
  dplyr::select(Unique_sample_ID_for_study:Inflammatory_index) %>%
  tidyr::pivot_longer(GCSF:Inflammatory_index,
               names_to = c('Cytokine_signal'),
               values_to = c('Cytokine_value'))

cytokines_acute_longer <- cytokines_acute %>%
  dplyr::select(Unique_sample_ID_for_study:Inflammatory_index) %>%
  tidyr::pivot_longer(GCSF:Inflammatory_index,
               names_to = c('Cytokine_signal'),
               values_to = c('Cytokine_value'))

cytokines_acute_longer_z <- cytokines_acute %>%
  dplyr::mutate(as.data.frame(scale(cytokines_acute[,14:30]))) %>% # Z score of cytokine values
  dplyr::select(Unique_sample_ID_for_study:Inflammatory_index) %>%
  tidyr::pivot_longer(GCSF:Inflammatory_index,
               names_to = c('Cytokine_signal'),
               values_to = c('Cytokine_value'))

We end up with our final dataset. It is important to take note that for few (<5) patients with two timepoints but only one cytokine measurement, the cytokine measurement was reflected in both samples, i.e. duplicated. We aimed to avoid as many NAs as possible as these interfere with subsequent analyses and models.

1.4 SOME INITIAL STATEMENTS

The code chunks and approaches rendered here have been inspired by many people, known and unknown. Amongst them are David Lamparter, Raphaël P. B. Jacquat, Julien Riou, Dominik Menges, and Tala Ballouz, with whom we have developed code in the past (https://doi.org/10.5281/zenodo.7454292). Sreedhar Saseendran Kumar, Shalini’s colleague at BEL and our dear friend, has been part of the predecessor of this study (published here: https://doi.org/10.1371/journal.ppat.1010118). Our beloved friend Sumana Srivatsa, formerly at ETH Zurich (Beerenwinkel group) and now residing in California, is always there for us with advice, help, and inspiration. We also thank enquirers and those knowledgeable enough to provide answers in the many online fora for sharing their investigations with all of us. This notebook is not to be considered a replacement of a manuscript. We develop code, ideas, and analyses along the way; we try to keep track and enable others to be part of our thought process. The order, arrangement, and interpretation of plots and analyses may (slightly) change when included in a scientific manuscript. If some interpretations do not entirely converge between the manuscript and the notebook, those in the manuscript are the ones that have been discussed, shaped, and validated. This notebook is not a publication but, for us, a convenient way to go back and refine, modify, mend or append some analyses, and we believe this is a good way for collaborators to independently validate our approach, and for readers and other scientists to understand what we have done and adapt material for their own needs. This manuscript is co-developed with a large kinetic study on immune responses in critically-ill patients, with cohorts from Zurich, Spain, and Andorra - and some of the approaches we conduct, and develop for our purpose, here, are directly used in the other study, too. If there are questions regarding the code, we are happy if you get in touch ().

2 INTRODUCTION AND MOST IMPORTANT QUESTIONS

2.1 Background

Following an infection with SARS-CoV-2, potentially leading to COVID-19, endothelial damage and the antiviral immune response elicit a cascade of events. Among the most notable symptoms reside inflammation with potential cytokine storms, complement system activation, NETosis, platelet activation, and COVID-19-associated coagulopathy. These disturbances have shown to cause deep-vein thrombosis, pulmonary embolism, myocardial injury and infarction, and stroke (https://doi.org/10.1038/s41569-021-00665-7). Recent reports have likened many of these phenotypes associated with SARS-CoV-2-infection with the antiphospholipid syndrome.

We have recently described (https://doi.org/10.1371/journal.ppat.1010118) that some antiphospholipid antibodies are enriched in individuals concomitant to and after an infection with SARS-CoV-2 versus non-infected controls. Specifically, we have seen

  1. an enrichment for AnV, B2GPI, and PT IgM but not IgG;
  2. an association between PT IgM and the strength of the anti-SARS-CoV-2 antibody response;
  3. and, to a limited extent, an association with DPO, severity, and sex.

Among the many potential disturbances in coagulatory function, which of them are responsible for the increased occurrence of thromboses is still unclear. We therefore, molecularly, characterise a heterogeneous cohort of individuals following infection with SARS-CoV-2 and/or vaccination with mRNA vaccines, aiming to study potential connections between antibodies against SARS-CoV-2 proteins, cytokine levels, and an extensive panel of antiphospholipid antibodies (aPL). While we potentially reveal causal relationships that interfere with coagulation and that may lead to coagulation, another option may be that some of the signatures could serve as biomarkers. However, increased aPL levels may also be merely an epiphenomenon.

Some literature:

With respect to statistical models and approaches, we typically refer to the following texts:

  • Gelman, Carlin, et al. Bayesian Data Analysis-Third Edition (2014)
  • Lambert, A Student’s Guide to Bayesian Statistics (2018)
  • McElreath, Statistical Rethinking (2020)
  • Zar, Biostatistical Analysis (2014)
  • Szklo, Nieto, Epidemiology (2019)

2.2 Aims and questions

In the current study, we aim to validate some of the previous results and aim to extend the work previously performed. The most important questions we aim to ask are:

  1. Can we reproduce the findings from our previous study?

  2. As a follow-up to our previous study, can we say anything about seroconversion of IgM to IgG or to IgA?

  3. If we compare the aPL levels in SARS-CoV-2 infected individuals to another disease, here influenza, does it look generic or specific to SARS-CoV-2?

  4. Is it infection with SARS-CoV-2 that elicits higher levels of aPL IgM, or equally so vaccination with mRNA vaccine?

  5. Are thrombotic events linked to the occurrence of aPL? Or to something else? We have not, previously, linked the finding to a clinical phenotype. Here, we may have the chance to identify such an association:

3 COHORT CHARACTERISATION

Here, we characterise the cohort based on the most important demographic and clinical features. We have to be a bit careful doing so as we included more than one timepoint in few samples (n=31, in total n=155 samples). Characteristics indicated here, therefore, refer to the first timepoint only, in case multiple timepoints are available. The characterisation performed here is a general one. For specific questions posed further downstream, we may have to indicate some of these features again, to justify certain comparisons and to adjust for sex and age, e.g.

  • How many individuals? n=124
  • How many samples? n=155
  • How many samples when accounting for the loss of samples due to NA values in the cytokine dataset? n=112
  • For how many individuals more than one timepoint? n=31

3.1 Cohort characterisation - table 1

table1(~ Age + Sex +
         as.numeric(DPOCoV2_or_FLU) +
         as.factor(Vaccination_statusonly_CoV2) + COVID_vaccination_Group +
         SeverityCoV2_or_Flu + Acute_SARS_CoV_2_infection + Anticoagulation.at.event + 
         Anticoagulation.chronic + Platelet.aggregation.inhibitor.at.event + 
         Platelet.aggregation.inhibitor.chronic + as.factor(Immunosuppressed_admission) + Thrombosis_group,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = data_demo)
## Warning in eval(predvars, data, env): NAs introduced by coercion
Overall
(N=124)
Age
Mean (SD) 55.1 (18.8)
Median (IQR) 58.0 (39.0 to 69.0)
Range 19 to 89
Sex
F 55 (44.4%)
M 69 (55.6%)
as.numeric(DPOCoV2_or_FLU)
Mean (SD) 62.6 (68.1)
Median (IQR) 25.0 (7.0 to 117.5)
Range 0 to 282
Missing 61 (49.2%)
as.factor(Vaccination_statusonly_CoV2)
0 86 (69.4%)
1 2 (1.6%)
2 24 (19.4%)
3 12 (9.7%)
COVID_vaccination_Group
00_Influenza_Non-infected/non-vaccinated 8 (6.5%)
01_Non-infected/non-vaccinated 32 (25.8%)
02_Infected/non-vaccinated 46 (37.1%)
03_Non-infected/vaccinated 24 (19.4%)
04_Infected/vaccinated 14 (11.3%)
SeverityCoV2_or_Flu
0_No disease 64 (51.6%)
1_anosmia, fever, fatigue, or headache but did not require hospitalization 3 (2.4%)
2_hospitalization without requiring oxygen supplementation 6 (4.8%)
3_hospitalization requiring oxygen supplementation 12 (9.7%)
4_hospitalization with treatment in the intensive care unit (ICU), mostly including ventilation 39 (31.5%)
Acute_SARS_CoV_2_infection
Yes 54 (43.5%)
No 70 (56.5%)
Anticoagulation.at.event
Yes 37 (39.4%)
No 57 (60.6%)
Missing 30 (24.2%)
Anticoagulation.chronic
Yes 29 (30.9%)
No 65 (69.1%)
Missing 30 (24.2%)
Platelet.aggregation.inhibitor.at.event
Yes 25 (26.6%)
No 69 (73.4%)
Missing 30 (24.2%)
Platelet.aggregation.inhibitor.chronic
Yes 25 (26.6%)
No 69 (73.4%)
Missing 30 (24.2%)
as.factor(Immunosuppressed_admission)
0 50 (53.2%)
1 20 (21.3%)
2 24 (25.5%)
Missing 30 (24.2%)
Thrombosis_group
Yes 31 (25.0%)
No 93 (75.0%)

The table provides us with the most important measure to characterise the cohort. We can generate a second table where we, additionally, group according to infection with SARS-CoV-2/vaccination with mRNA vaccine.

3.2 Cohort characterisation - table 2

table1(~ Age + Sex +
         as.numeric(DPOCoV2_or_FLU) +
         as.factor(Vaccination_statusonly_CoV2) +
         SeverityCoV2_or_Flu + Acute_SARS_CoV_2_infection + Anticoagulation.at.event + 
         Anticoagulation.chronic + Platelet.aggregation.inhibitor.at.event + 
         Platelet.aggregation.inhibitor.chronic + as.factor(Immunosuppressed_admission) + Thrombosis_group
       | COVID_vaccination_Group,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = data_demo)
## Warning in eval(predvars, data, env): NAs introduced by coercion
00_Influenza_Non-infected/non-vaccinated
(N=8)
01_Non-infected/non-vaccinated
(N=32)
02_Infected/non-vaccinated
(N=46)
03_Non-infected/vaccinated
(N=24)
04_Infected/vaccinated
(N=14)
Overall
(N=124)
Age
Mean (SD) 54.0 (12.1) 34.6 (13.4) 60.5 (13.1) 65.6 (15.0) 66.9 (19.1) 55.1 (18.8)
Median (IQR) 54.5 (49.2 to 60.5) 28.5 (25.8 to 39.2) 62.5 (53.2 to 68.8) 67.0 (56.8 to 77.5) 73.5 (49.5 to 83.8) 58.0 (39.0 to 69.0)
Range 31 to 69 19 to 65 19 to 83 27 to 85 35 to 89 19 to 89
Sex
F 4 (50.0%) 19 (59.4%) 16 (34.8%) 10 (41.7%) 6 (42.9%) 55 (44.4%)
M 4 (50.0%) 13 (40.6%) 30 (65.2%) 14 (58.3%) 8 (57.1%) 69 (55.6%)
as.numeric(DPOCoV2_or_FLU)
Mean (SD) 4.8 (3.5) NA (NA) 86.1 (69.3) NA (NA) 10.7 (6.4) 62.6 (68.1)
Median (IQR) 4.0 (2.0 to 6.2) NA (NA to NA) 72.0 (20.0 to 137.5) NA (NA to NA) 10.0 (7.0 to 14.5) 25.0 (7.0 to 117.5)
Range 1 to 10 NA to NA 1 to 282 NA to NA 0 to 22 0 to 282
Missing 0 (0%) 32 (100%) 2 (4.3%) 24 (100%) 3 (21.4%) 61 (49.2%)
as.factor(Vaccination_statusonly_CoV2)
0 8 (100.0%) 32 (100.0%) 46 (100.0%) 0 (0.0%) 0 (0.0%) 86 (69.4%)
1 0 (0.0%) 0 (0.0%) 0 (0.0%) 0 (0.0%) 2 (14.3%) 2 (1.6%)
2 0 (0.0%) 0 (0.0%) 0 (0.0%) 16 (66.7%) 8 (57.1%) 24 (19.4%)
3 0 (0.0%) 0 (0.0%) 0 (0.0%) 8 (33.3%) 4 (28.6%) 12 (9.7%)
SeverityCoV2_or_Flu
0_No disease 8 (100.0%) 32 (100.0%) 0 (0.0%) 24 (100.0%) 0 (0.0%) 64 (51.6%)
1_anosmia, fever, fatigue, or headache but did not require hospitalization 0 (0.0%) 0 (0.0%) 1 (2.2%) 0 (0.0%) 2 (14.3%) 3 (2.4%)
2_hospitalization without requiring oxygen supplementation 0 (0.0%) 0 (0.0%) 1 (2.2%) 0 (0.0%) 5 (35.7%) 6 (4.8%)
3_hospitalization requiring oxygen supplementation 0 (0.0%) 0 (0.0%) 5 (10.9%) 0 (0.0%) 7 (50.0%) 12 (9.7%)
4_hospitalization with treatment in the intensive care unit (ICU), mostly including ventilation 0 (0.0%) 0 (0.0%) 39 (84.8%) 0 (0.0%) 0 (0.0%) 39 (31.5%)
Acute_SARS_CoV_2_infection
Yes 0 (0.0%) 0 (0.0%) 44 (95.7%) 0 (0.0%) 10 (71.4%) 54 (43.5%)
No 8 (100.0%) 32 (100.0%) 2 (4.3%) 24 (100.0%) 4 (28.6%) 70 (56.5%)
Anticoagulation.at.event
Yes 8 (100.0%) 0 (0.0%) 21 (45.7%) 3 (12.5%) 5 (35.7%) 37 (39.4%)
No 0 (0.0%) 2 (100.0%) 25 (54.3%) 21 (87.5%) 9 (64.3%) 57 (60.6%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (24.2%)
Anticoagulation.chronic
Yes 1 (12.5%) 0 (0.0%) 20 (43.5%) 3 (12.5%) 5 (35.7%) 29 (30.9%)
No 7 (87.5%) 2 (100.0%) 26 (56.5%) 21 (87.5%) 9 (64.3%) 65 (69.1%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (24.2%)
Platelet.aggregation.inhibitor.at.event
Yes 1 (12.5%) 0 (0.0%) 13 (28.3%) 7 (29.2%) 4 (28.6%) 25 (26.6%)
No 7 (87.5%) 2 (100.0%) 33 (71.7%) 17 (70.8%) 10 (71.4%) 69 (73.4%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (24.2%)
Platelet.aggregation.inhibitor.chronic
Yes 1 (12.5%) 1 (50.0%) 13 (28.3%) 6 (25.0%) 4 (28.6%) 25 (26.6%)
No 7 (87.5%) 1 (50.0%) 33 (71.7%) 18 (75.0%) 10 (71.4%) 69 (73.4%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (24.2%)
as.factor(Immunosuppressed_admission)
0 4 (50.0%) 1 (50.0%) 25 (54.3%) 15 (62.5%) 5 (35.7%) 50 (53.2%)
1 2 (25.0%) 0 (0.0%) 10 (21.7%) 3 (12.5%) 5 (35.7%) 20 (21.3%)
2 2 (25.0%) 1 (50.0%) 11 (23.9%) 6 (25.0%) 4 (28.6%) 24 (25.5%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (24.2%)
Thrombosis_group
Yes 1 (12.5%) 0 (0.0%) 27 (58.7%) 2 (8.3%) 1 (7.1%) 31 (25.0%)
No 7 (87.5%) 32 (100.0%) 19 (41.3%) 22 (91.7%) 13 (92.9%) 93 (75.0%)

For reference, we build the same table where we show the characteristics for all samples, and not just the individuals. This is not for the manuscript but contains information we need to look up frequently…

3.3 Cohort characterisation - table 3

table1(~ Age + Sex +
         as.numeric(DPOCoV2_or_FLU) +
         as.factor(Vaccination_statusonly_CoV2) +
         SeverityCoV2_or_Flu + Acute_SARS_CoV_2_infection + Anticoagulation.at.event + 
         Anticoagulation.chronic + Platelet.aggregation.inhibitor.at.event + 
         Platelet.aggregation.inhibitor.chronic + as.factor(Immunosuppressed_admission) + Thrombosis_group
       | COVID_vaccination_Group,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = data)
## Warning in eval(predvars, data, env): NAs introduced by coercion
00_Influenza_Non-infected/non-vaccinated
(N=16)
01_Non-infected/non-vaccinated
(N=32)
02_Infected/non-vaccinated
(N=69)
03_Non-infected/vaccinated
(N=24)
04_Infected/vaccinated
(N=14)
Overall
(N=155)
Age
Mean (SD) 54.0 (11.7) 34.6 (13.4) 60.4 (12.6) 65.6 (15.0) 66.9 (19.1) 55.8 (17.7)
Median (IQR) 54.5 (49.2 to 60.5) 28.5 (25.8 to 39.2) 62.0 (53.0 to 68.0) 67.0 (56.8 to 77.5) 73.5 (49.5 to 83.8) 58.0 (44.5 to 68.0)
Range 31 to 69 19 to 65 19 to 83 27 to 85 35 to 89 19 to 89
Sex
F 8 (50.0%) 19 (59.4%) 23 (33.3%) 10 (41.7%) 6 (42.9%) 66 (42.6%)
M 8 (50.0%) 13 (40.6%) 46 (66.7%) 14 (58.3%) 8 (57.1%) 89 (57.4%)
as.numeric(DPOCoV2_or_FLU)
Mean (SD) 12.2 (14.5) NA (NA) 89.7 (65.3) NA (NA) 10.7 (6.4) 67.3 (65.8)
Median (IQR) 6.0 (4.0 to 12.8) NA (NA to NA) 78.0 (35.5 to 139.5) NA (NA to NA) 10.0 (7.0 to 14.5) 54.0 (9.2 to 121.5)
Range 1 to 56 NA to NA 1 to 282 NA to NA 0 to 22 0 to 282
Missing 0 (0%) 32 (100%) 2 (2.9%) 24 (100%) 3 (21.4%) 61 (39.4%)
as.factor(Vaccination_statusonly_CoV2)
0 16 (100.0%) 32 (100.0%) 69 (100.0%) 0 (0.0%) 0 (0.0%) 117 (75.5%)
1 0 (0.0%) 0 (0.0%) 0 (0.0%) 0 (0.0%) 2 (14.3%) 2 (1.3%)
2 0 (0.0%) 0 (0.0%) 0 (0.0%) 16 (66.7%) 8 (57.1%) 24 (15.5%)
3 0 (0.0%) 0 (0.0%) 0 (0.0%) 8 (33.3%) 4 (28.6%) 12 (7.7%)
SeverityCoV2_or_Flu
0_No disease 16 (100.0%) 32 (100.0%) 0 (0.0%) 24 (100.0%) 0 (0.0%) 72 (46.5%)
1_anosmia, fever, fatigue, or headache but did not require hospitalization 0 (0.0%) 0 (0.0%) 1 (1.4%) 0 (0.0%) 2 (14.3%) 3 (1.9%)
2_hospitalization without requiring oxygen supplementation 0 (0.0%) 0 (0.0%) 1 (1.4%) 0 (0.0%) 5 (35.7%) 6 (3.9%)
3_hospitalization requiring oxygen supplementation 0 (0.0%) 0 (0.0%) 5 (7.2%) 0 (0.0%) 7 (50.0%) 12 (7.7%)
4_hospitalization with treatment in the intensive care unit (ICU), mostly including ventilation 0 (0.0%) 0 (0.0%) 62 (89.9%) 0 (0.0%) 0 (0.0%) 62 (40.0%)
Acute_SARS_CoV_2_infection
Yes 0 (0.0%) 0 (0.0%) 67 (97.1%) 0 (0.0%) 10 (71.4%) 77 (49.7%)
No 16 (100.0%) 32 (100.0%) 2 (2.9%) 24 (100.0%) 4 (28.6%) 78 (50.3%)
Anticoagulation.at.event
Yes 16 (100.0%) 0 (0.0%) 36 (52.2%) 3 (12.5%) 5 (35.7%) 60 (48.0%)
No 0 (0.0%) 2 (100.0%) 33 (47.8%) 21 (87.5%) 9 (64.3%) 65 (52.0%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
Anticoagulation.chronic
Yes 2 (12.5%) 0 (0.0%) 35 (50.7%) 3 (12.5%) 5 (35.7%) 45 (36.0%)
No 14 (87.5%) 2 (100.0%) 34 (49.3%) 21 (87.5%) 9 (64.3%) 80 (64.0%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
Platelet.aggregation.inhibitor.at.event
Yes 2 (12.5%) 0 (0.0%) 21 (30.4%) 7 (29.2%) 4 (28.6%) 34 (27.2%)
No 14 (87.5%) 2 (100.0%) 48 (69.6%) 17 (70.8%) 10 (71.4%) 91 (72.8%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
Platelet.aggregation.inhibitor.chronic
Yes 2 (12.5%) 1 (50.0%) 21 (30.4%) 6 (25.0%) 4 (28.6%) 34 (27.2%)
No 14 (87.5%) 1 (50.0%) 48 (69.6%) 18 (75.0%) 10 (71.4%) 91 (72.8%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
as.factor(Immunosuppressed_admission)
0 8 (50.0%) 1 (50.0%) 33 (47.8%) 15 (62.5%) 5 (35.7%) 62 (49.6%)
1 4 (25.0%) 0 (0.0%) 17 (24.6%) 3 (12.5%) 5 (35.7%) 29 (23.2%)
2 4 (25.0%) 1 (50.0%) 19 (27.5%) 6 (25.0%) 4 (28.6%) 34 (27.2%)
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
Thrombosis_group
Yes 1 (6.2%) 0 (0.0%) 42 (60.9%) 2 (8.3%) 1 (7.1%) 46 (29.7%)
No 15 (93.8%) 32 (100.0%) 27 (39.1%) 22 (91.7%) 13 (92.9%) 109 (70.3%)

3.4 Cohort characterisation - table 4

table1(~ CL_IgG + PA_IgG + PC_IgG + PE_IgG + PG_IgG + PI_IgG + PS_IgG + AnnV_IgG + ß2GPI_IgG + PT_IgG + CL_IgM + PA_IgM + PC_IgM + PE_IgM + PG_IgM + PI_IgM + PS_IgM + AnnV_IgM + ß2GPI_IgM + PT_IgM + CL_IgA + PA_IgA + PC_IgA + PE_IgA + PG_IgA + PI_IgA + PS_IgA + AnnV_IgA + ß2GPI_IgA + PT_IgA | COVID_vaccination_Group,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = data)
00_Influenza_Non-infected/non-vaccinated
(N=16)
01_Non-infected/non-vaccinated
(N=32)
02_Infected/non-vaccinated
(N=69)
03_Non-infected/vaccinated
(N=24)
04_Infected/vaccinated
(N=14)
Overall
(N=155)
CL_IgG
Mean (SD) 2.8 (6.3) 2.4 (6.9) 10.5 (10.8) 10.6 (15.2) 3.0 (6.2) 7.4 (10.8)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 12.0 (0.0 to 17.0) 0.0 (0.0 to 17.5) 0.0 (0.0 to 0.0) 0.0 (0.0 to 15.0)
Range 0 to 19 0 to 24 0 to 39 0 to 53 0 to 19 0 to 53
PA_IgG
Mean (SD) 0.0 (0.0) 0.0 (0.0) 4.1 (7.2) 6.5 (10.6) 2.3 (5.9) 3.0 (6.9)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 9.0) 0.0 (0.0 to 11.2) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 0 0 to 23 0 to 36 0 to 19 0 to 36
PC_IgG
Mean (SD) 0.0 (0.0) 0.0 (0.0) 0.0 (0.0) 0.0 (0.0) 0.0 (0.0) 0.0 (0.0)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 0 0 to 0 0 to 0 0 to 0 0 to 0
PE_IgG
Mean (SD) 0.0 (0.0) 0.0 (0.0) 0.0 (0.0) 0.9 (4.3) 0.0 (0.0) 0.1 (1.7)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 0 0 to 0 0 to 21 0 to 0 0 to 21
PG_IgG
Mean (SD) 0.0 (0.0) 2.4 (5.1) 0.0 (0.0) 1.1 (5.5) 0.0 (0.0) 0.7 (3.3)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 17 0 to 0 0 to 27 0 to 0 0 to 27
PI_IgG
Mean (SD) 0.0 (0.0) 0.4 (2.1) 0.9 (4.2) 1.2 (6.1) 0.0 (0.0) 0.7 (3.8)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 12 0 to 21 0 to 30 0 to 0 0 to 30
PS_IgG
Mean (SD) 0.0 (0.0) 0.0 (0.0) 3.4 (8.0) 4.5 (8.7) 0.0 (0.0) 2.2 (6.6)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 2.2) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 0 0 to 32 0 to 31 0 to 0 0 to 32
AnnV_IgG
Mean (SD) 0.0 (0.0) 2.2 (7.8) 2.3 (6.2) 4.5 (10.7) 3.8 (8.5) 2.5 (7.3)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 38 0 to 29 0 to 36 0 to 29 0 to 38
ß2GPI_IgG
Mean (SD) 4.7 (7.6) 1.3 (4.2) 8.3 (14.2) 9.5 (13.9) 5.7 (8.2) 6.4 (11.9)
Median (IQR) 0.0 (0.0 to 10.5) 0.0 (0.0 to 0.0) 0.0 (0.0 to 13.0) 0.0 (0.0 to 15.0) 0.0 (0.0 to 12.0) 0.0 (0.0 to 12.0)
Range 0 to 22 0 to 19 0 to 89 0 to 41 0 to 20 0 to 89
PT_IgG
Mean (SD) 0.0 (0.0) 1.5 (4.1) 0.6 (3.2) 2.9 (11.5) 0.0 (0.0) 1.0 (5.3)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 16 0 to 21 0 to 55 0 to 0 0 to 55
CL_IgM
Mean (SD) 9.0 (24.6) 1.7 (5.4) 12.8 (18.1) 9.5 (15.2) 3.8 (7.6) 8.8 (16.4)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 26.0) 0.0 (0.0 to 15.2) 0.0 (0.0 to 0.0) 0.0 (0.0 to 16.0)
Range 0 to 74 0 to 19 0 to 67 0 to 57 0 to 20 0 to 74
PA_IgM
Mean (SD) 20.5 (33.9) 0.9 (3.7) 13.3 (21.1) 8.2 (14.6) 4.3 (8.6) 9.9 (19.6)
Median (IQR) 0.0 (0.0 to 32.8) 0.0 (0.0 to 0.0) 0.0 (0.0 to 25.0) 0.0 (0.0 to 14.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 18.0)
Range 0 to 98 0 to 18 0 to 94 0 to 50 0 to 23 0 to 98
PC_IgM
Mean (SD) 0.0 (0.0) 0.5 (2.7) 0.0 (0.0) 0.0 (0.0) 0.0 (0.0) 0.1 (1.2)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 15 0 to 0 0 to 0 0 to 0 0 to 15
PE_IgM
Mean (SD) 0.0 (0.0) 0.5 (2.8) 0.0 (0.0) 2.1 (10.4) 0.0 (0.0) 0.4 (4.3)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 16 0 to 0 0 to 51 0 to 0 0 to 51
PG_IgM
Mean (SD) 0.0 (0.0) 0.0 (0.0) 0.7 (3.8) 2.1 (10.2) 0.0 (0.0) 0.6 (4.7)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 0 0 to 25 0 to 50 0 to 0 0 to 50
PI_IgM
Mean (SD) 0.0 (0.0) 0.9 (5.1) 2.2 (9.8) 2.8 (10.7) 0.0 (0.0) 1.6 (8.1)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 29 0 to 56 0 to 51 0 to 0 0 to 56
PS_IgM
Mean (SD) 7.6 (21.0) 1.2 (4.9) 11.2 (17.1) 9.1 (15.0) 4.4 (8.9) 7.8 (15.2)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 24.0) 0.0 (0.0 to 15.5) 0.0 (0.0 to 0.0) 0.0 (0.0 to 5.5)
Range 0 to 70 0 to 21 0 to 68 0 to 47 0 to 23 0 to 70
AnnV_IgM
Mean (SD) 11.2 (17.5) 12.0 (16.6) 36.2 (24.2) 14.8 (13.5) 21.7 (25.8) 24.0 (23.5)
Median (IQR) 0.0 (0.0 to 28.5) 5.5 (0.0 to 13.2) 37.0 (23.0 to 47.0) 18.5 (0.0 to 26.0) 18.5 (0.0 to 33.2) 24.0 (0.0 to 38.5)
Range 0 to 46 0 to 56 0 to 101 0 to 34 0 to 90 0 to 101
ß2GPI_IgM
Mean (SD) 23.9 (25.5) 15.3 (14.7) 36.6 (19.4) 27.1 (16.4) 21.6 (16.5) 28.1 (20.2)
Median (IQR) 28.5 (0.0 to 36.5) 17.5 (0.0 to 27.0) 35.0 (27.0 to 48.0) 30.0 (17.8 to 38.2) 24.5 (3.5 to 33.8) 29.0 (14.5 to 41.0)
Range 0 to 78 0 to 47 0 to 95 0 to 54 0 to 47 0 to 95
PT_IgM
Mean (SD) 6.7 (16.1) 7.1 (10.5) 19.5 (20.1) 8.0 (10.6) 7.1 (15.5) 12.7 (17.3)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 14.5) 22.0 (0.0 to 35.0) 0.0 (0.0 to 16.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 25.0)
Range 0 to 58 0 to 41 0 to 82 0 to 31 0 to 51 0 to 82
CL_IgA
Mean (SD) 12.0 (6.1) 14.0 (3.9) 16.7 (10.4) 28.3 (18.6) 9.0 (5.8) 17.0 (12.3)
Median (IQR) 10.5 (7.8 to 14.2) 13.0 (11.2 to 15.8) 14.0 (11.0 to 19.0) 22.5 (16.0 to 37.8) 9.0 (3.5 to 13.0) 14.0 (10.0 to 19.0)
Range 6 to 28 9 to 25 5 to 78 3 to 81 0 to 19 0 to 81
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
PA_IgA
Mean (SD) 7.7 (11.7) 12.3 (3.7) 5.1 (4.0) 13.2 (12.6) 2.6 (1.4) 7.4 (8.0)
Median (IQR) 3.0 (3.0 to 4.5) 12.0 (11.0 to 14.5) 5.0 (3.0 to 7.0) 9.5 (4.0 to 20.2) 3.0 (2.0 to 3.0) 5.0 (3.0 to 10.0)
Range 0 to 41 4 to 19 0 to 27 0 to 45 0 to 5 0 to 45
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
PC_IgA
Mean (SD) 4.4 (1.5) 0.4 (1.3) 4.5 (1.7) 5.9 (3.3) 3.6 (1.3) 4.1 (2.5)
Median (IQR) 4.0 (4.0 to 4.2) 0.0 (0.0 to 0.0) 4.0 (4.0 to 5.0) 5.0 (4.0 to 6.2) 3.5 (3.0 to 4.8) 4.0 (3.0 to 5.0)
Range 2 to 9 0 to 5 0 to 11 0 to 15 0 to 5 0 to 15
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
PE_IgA
Mean (SD) 1.7 (2.4) 0.1 (0.5) 2.0 (4.6) 1.8 (3.3) 1.4 (1.2) 1.6 (3.6)
Median (IQR) 1.0 (0.0 to 3.0) 0.0 (0.0 to 0.0) 1.0 (0.0 to 3.0) 0.0 (0.0 to 2.0) 2.0 (0.0 to 2.0) 0.0 (0.0 to 2.0)
Range 0 to 9 0 to 2 0 to 37 0 to 14 0 to 3 0 to 37
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
PG_IgA
Mean (SD) 3.2 (1.8) 0.1 (0.5) 5.5 (12.6) 7.8 (8.5) 1.4 (1.5) 4.5 (9.8)
Median (IQR) 3.0 (2.8 to 3.2) 0.0 (0.0 to 0.0) 3.0 (2.0 to 4.0) 4.5 (3.0 to 10.2) 1.0 (0.0 to 2.8) 3.0 (2.0 to 4.0)
Range 0 to 9 0 to 2 0 to 103 0 to 36 0 to 4 0 to 103
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
PI_IgA
Mean (SD) 3.5 (2.4) 1.4 (3.1) 4.8 (4.8) 11.2 (10.9) 2.3 (1.9) 5.1 (6.5)
Median (IQR) 3.0 (2.0 to 4.2) 0.0 (0.0 to 0.0) 4.0 (3.0 to 5.0) 5.5 (4.0 to 17.0) 2.0 (0.5 to 3.0) 3.0 (2.0 to 5.0)
Range 0 to 10 0 to 11 0 to 30 3 to 45 0 to 6 0 to 45
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
PS_IgA
Mean (SD) 5.4 (2.1) 12.7 (6.4) 8.4 (6.3) 19.8 (14.3) 5.1 (4.6) 10.2 (9.1)
Median (IQR) 5.0 (4.0 to 6.0) 12.5 (9.0 to 15.0) 7.0 (4.0 to 10.0) 15.5 (9.8 to 27.0) 3.0 (2.2 to 7.8) 8.0 (5.0 to 12.0)
Range 3 to 11 0 to 29 0 to 40 3 to 55 0 to 13 0 to 55
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
AnnV_IgA
Mean (SD) 6.5 (9.2) 7.5 (4.9) 9.1 (14.3) 9.6 (10.8) 3.4 (4.1) 8.1 (11.6)
Median (IQR) 3.5 (3.0 to 6.8) 9.0 (3.2 to 11.0) 5.0 (3.0 to 11.0) 6.0 (3.0 to 12.2) 2.5 (0.5 to 3.8) 5.0 (2.0 to 10.0)
Range 0 to 39 0 to 14 0 to 83 0 to 41 0 to 15 0 to 83
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
ß2GPI_IgA
Mean (SD) 24.2 (35.8) 15.2 (3.2) 15.3 (13.5) 26.5 (16.5) 9.8 (10.0) 17.7 (17.6)
Median (IQR) 11.5 (6.8 to 16.0) 15.5 (13.0 to 17.5) 12.0 (7.0 to 17.0) 22.5 (14.0 to 36.8) 8.5 (2.8 to 11.8) 13.0 (8.0 to 19.0)
Range 3 to 121 9 to 21 0 to 71 2 to 60 0 to 37 0 to 121
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)
PT_IgA
Mean (SD) 4.3 (4.2) 6.9 (4.8) 4.2 (3.4) 7.8 (7.7) 2.6 (1.9) 5.0 (4.8)
Median (IQR) 3.0 (2.8 to 4.8) 9.0 (1.2 to 10.0) 4.0 (3.0 to 5.0) 6.0 (3.8 to 7.5) 2.0 (2.0 to 4.0) 4.0 (2.0 to 6.0)
Range 0 to 16 0 to 14 0 to 23 0 to 31 0 to 6 0 to 31
Missing 0 (0%) 14 (43.8%) 0 (0%) 0 (0%) 0 (0%) 14 (9.0%)

3.5 Cohort characterisation - table 5

table1(~ Spike_IgG + RBD_IgG + NC_IgG + Spike_IgA + RBD_IgA + NC_IgA
       | COVID_vaccination_Group,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = data)
00_Influenza_Non-infected/non-vaccinated
(N=16)
01_Non-infected/non-vaccinated
(N=32)
02_Infected/non-vaccinated
(N=69)
03_Non-infected/vaccinated
(N=24)
04_Infected/vaccinated
(N=14)
Overall
(N=155)
Spike_IgG
Mean (SD) 1.4 (0.3) 1.4 (0.7) 3.1 (0.8) 2.7 (0.8) 3.5 (1.3) 2.8 (1.0)
Median (IQR) 1.4 (1.3 to 1.6) 1.4 (1.1 to 1.6) 3.3 (2.5 to 3.8) 2.6 (2.2 to 3.2) 3.9 (3.2 to 4.1) 2.9 (1.9 to 3.6)
Range 1 to 2 1 to 2 1 to 4 1 to 5 1 to 6 1 to 6
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
RBD_IgG
Mean (SD) 1.1 (0.2) 1.0 (0.6) 2.6 (1.0) 2.2 (0.9) 3.1 (1.4) 2.4 (1.1)
Median (IQR) 1.1 (1.0 to 1.3) 1.0 (0.8 to 1.2) 2.8 (2.2 to 3.2) 2.1 (1.6 to 2.8) 3.8 (2.7 to 4.1) 2.4 (1.5 to 3.0)
Range 1 to 1 1 to 1 0 to 6 1 to 4 0 to 4 0 to 6
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
NC_IgG
Mean (SD) 1.2 (0.3) 1.1 (1.1) 3.5 (1.1) 1.5 (0.7) 2.6 (1.1) 2.7 (1.3)
Median (IQR) 1.3 (1.1 to 1.4) 1.1 (0.7 to 1.5) 3.4 (2.7 to 4.0) 1.4 (1.0 to 1.7) 3.0 (2.0 to 3.2) 2.8 (1.5 to 3.5)
Range 0 to 2 0 to 2 1 to 6 1 to 3 1 to 4 0 to 6
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
Spike_IgA
Mean (SD) 1.3 (0.6) 1.1 (0.4) 2.6 (0.9) 1.8 (0.8) 2.0 (0.7) 2.2 (1.0)
Median (IQR) 1.3 (1.0 to 1.8) 1.1 (1.0 to 1.3) 2.7 (2.2 to 3.2) 1.7 (1.2 to 2.2) 2.0 (1.4 to 2.5) 2.2 (1.5 to 2.9)
Range 0 to 2 1 to 1 0 to 6 0 to 3 1 to 3 0 to 6
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
RBD_IgA
Mean (SD) 0.1 (0.1) 1.1 (0.2) 2.4 (1.2) 1.3 (0.7) 1.4 (0.5) 1.8 (1.3)
Median (IQR) 0.0 (0.0 to 0.0) 1.1 (1.0 to 1.2) 2.3 (1.8 to 3.1) 1.2 (0.9 to 1.7) 1.3 (1.1 to 1.9) 1.7 (0.9 to 2.6)
Range 0 to 0 1 to 1 0 to 6 0 to 3 1 to 3 0 to 6
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)
NC_IgA
Mean (SD) 0.9 (0.5) 1.5 (1.0) 1.7 (1.5) 1.0 (0.5) 1.6 (0.8) 1.4 (1.2)
Median (IQR) 1.0 (0.7 to 1.2) 1.5 (1.1 to 1.9) 1.5 (0.1 to 2.8) 1.0 (0.8 to 1.3) 1.3 (1.0 to 2.2) 1.1 (0.5 to 2.4)
Range 0 to 2 1 to 2 0 to 6 0 to 2 1 to 3 0 to 6
Missing 0 (0%) 30 (93.8%) 0 (0%) 0 (0%) 0 (0%) 30 (19.4%)

3.6 Cohort characterisation - table 6

table1(~ GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha + MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index + Inflammatory_index_IFNgamma
       | COVID_vaccination_Group,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = data)
00_Influenza_Non-infected/non-vaccinated
(N=16)
01_Non-infected/non-vaccinated
(N=32)
02_Infected/non-vaccinated
(N=69)
03_Non-infected/vaccinated
(N=24)
04_Infected/vaccinated
(N=14)
Overall
(N=155)
GCSF
Mean (SD) 17.4 (48.2) 2.7 (2.2) 11.7 (13.5) 15.7 (43.7) 6.0 (7.0) 12.5 (28.6)
Median (IQR) 0.0 (0.0 to 10.9) 2.7 (1.9 to 3.4) 7.4 (5.5 to 11.3) 4.2 (0.9 to 12.4) 3.4 (1.1 to 9.7) 6.5 (2.7 to 11.5)
Range 0 to 195 1 to 4 0 to 67 0 to 217 0 to 24 0 to 217
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
GMCSF
Mean (SD) 4.5 (12.8) 6.5 (9.3) 7.9 (11.0) 5.4 (14.1) 4.0 (11.1) 6.4 (11.9)
Median (IQR) 0.0 (0.0 to 0.0) 6.5 (3.3 to 9.8) 0.0 (0.0 to 13.5) 0.0 (0.0 to 0.6) 0.0 (0.0 to 0.0) 0.0 (0.0 to 10.6)
Range 0 to 44 0 to 13 0 to 41 0 to 54 0 to 39 0 to 54
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IFNalpha
Mean (SD) 0.4 (0.9) 0.3 (0.4) 0.6 (2.4) 0.2 (0.3) 0.6 (1.2) 0.5 (1.8)
Median (IQR) 0.0 (0.0 to 0.1) 0.3 (0.1 to 0.4) 0.1 (0.0 to 0.2) 0.1 (0.0 to 0.2) 0.2 (0.1 to 0.3) 0.1 (0.0 to 0.2)
Range 0 to 3 0 to 1 0 to 18 0 to 1 0 to 4 0 to 18
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IFNgamma
Mean (SD) 0.0 (0.0) 0.0 (0.0) 0.3 (1.2) 0.0 (0.0) 0.0 (0.0) 0.1 (0.9)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0)
Range 0 to 0 0 to 0 0 to 7 0 to 0 0 to 0 0 to 7
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IL1beta
Mean (SD) 1.3 (2.2) 0.5 (0.5) 1.7 (3.3) 2.1 (2.5) 1.0 (0.8) 1.6 (2.8)
Median (IQR) 0.9 (0.2 to 1.3) 0.5 (0.3 to 0.7) 1.0 (0.4 to 1.5) 0.9 (0.5 to 3.0) 0.9 (0.4 to 1.3) 0.9 (0.4 to 1.5)
Range 0 to 9 0 to 1 0 to 23 0 to 10 0 to 3 0 to 23
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IL4
Mean (SD) 0.8 (2.6) 0.0 (0.0) 4.8 (9.9) 0.4 (1.7) 0.6 (1.6) 2.7 (7.4)
Median (IQR) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 2.7 (0.0 to 5.4) 0.0 (0.0 to 0.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 3.3)
Range 0 to 10 0 to 0 0 to 72 0 to 9 0 to 5 0 to 72
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IL6
Mean (SD) 59.1 (139.5) 1.4 (1.9) 56.4 (129.4) 31.6 (84.0) 15.3 (25.4) 45.3 (112.9)
Median (IQR) 0.0 (0.0 to 22.1) 1.4 (0.7 to 2.0) 14.3 (4.8 to 40.7) 10.4 (0.0 to 23.8) 8.1 (1.7 to 16.3) 11.4 (2.7 to 28.4)
Range 0 to 435 0 to 3 0 to 854 0 to 412 0 to 98 0 to 854
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IL8
Mean (SD) 14.7 (14.8) 10.6 (5.7) 25.3 (70.4) 30.2 (46.1) 14.1 (15.0) 23.2 (54.7)
Median (IQR) 7.2 (4.5 to 26.2) 10.6 (8.6 to 12.7) 9.8 (4.2 to 19.9) 12.3 (6.2 to 28.1) 8.7 (7.3 to 11.3) 9.6 (5.5 to 22.1)
Range 0 to 43 7 to 15 1 to 442 4 to 195 4 to 61 0 to 442
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IL10
Mean (SD) 6.6 (14.2) 355.0 (498.7) 3.6 (5.8) 1.7 (1.8) 3.1 (3.0) 9.9 (66.9)
Median (IQR) 0.0 (0.0 to 3.7) 355.0 (178.7 to 531.3) 2.0 (1.0 to 4.0) 1.1 (0.6 to 2.3) 1.9 (1.6 to 3.8) 1.9 (0.7 to 3.7)
Range 0 to 48 2 to 708 0 to 41 0 to 8 1 to 12 0 to 708
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IL17A
Mean (SD) 1.0 (1.8) 0.0 (0.0) 1.4 (1.6) 0.6 (1.3) 2.0 (2.1) 1.2 (1.7)
Median (IQR) 0.0 (0.0 to 1.7) 0.0 (0.0 to 0.0) 1.4 (0.0 to 2.2) 0.0 (0.0 to 0.2) 1.6 (0.0 to 2.9) 0.0 (0.0 to 2.0)
Range 0 to 6 0 to 0 0 to 7 0 to 5 0 to 6 0 to 7
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
IP10
Mean (SD) 84.2 (100.3) 34.7 (8.8) 70.2 (121.9) 48.2 (39.2) 97.9 (115.2) 70.3 (104.1)
Median (IQR) 30.7 (25.5 to 106.0) 34.7 (31.6 to 37.8) 28.1 (11.6 to 72.6) 34.9 (19.5 to 59.6) 65.7 (39.1 to 105.4) 36.8 (17.9 to 74.4)
Range 10 to 348 28 to 41 2 to 768 9 to 170 16 to 477 2 to 768
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
MIP1alpha
Mean (SD) 1.2 (1.5) 12.0 (9.6) 3.8 (4.3) 7.9 (8.7) 5.7 (4.2) 4.7 (5.8)
Median (IQR) 0.5 (0.2 to 1.3) 12.0 (8.6 to 15.4) 2.2 (1.2 to 5.0) 4.5 (2.8 to 8.8) 4.7 (2.5 to 7.8) 2.7 (1.3 to 5.5)
Range 0 to 5 5 to 19 0 to 19 1 to 38 1 to 17 0 to 38
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
MIP1beta
Mean (SD) 9.4 (14.0) 100.2 (0.7) 57.0 (48.9) 89.9 (54.4) 69.1 (31.8) 59.5 (50.5)
Median (IQR) 2.0 (0.0 to 12.6) 100.2 (99.9 to 100.4) 42.3 (22.5 to 74.7) 74.4 (49.0 to 111.9) 63.5 (52.0 to 71.2) 48.0 (22.5 to 80.2)
Range 0 to 50 100 to 101 2 to 237 30 to 255 39 to 168 0 to 255
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
S100A8_A9
Mean (SD) 2028.4 (2610.2) 16164.4 (9591.8) 26379.2 (17915.3) 30762.9 (15428.0) 39310.7 (19240.4) 25273.9 (19100.4)
Median (IQR) 1347.4 (584.8 to 1924.4) 16164.4 (12773.2 to 19555.6) 23940.5 (14712.5 to 35627.8) 27551.5 (24103.5 to 37492.2) 37070.4 (26730.5 to 53123.2) 24186.7 (12144.4 to 35627.8)
Range 175 to 10923 9382 to 22947 0 to 109070 10114 to 83363 11566 to 71985 0 to 109070
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
SDF1alpha
Mean (SD) 366.4 (217.0) 1394.2 (369.2) 1418.9 (681.8) 1677.6 (492.8) 1874.5 (622.7) 1380.5 (729.7)
Median (IQR) 324.6 (197.0 to 519.2) 1394.2 (1263.6 to 1524.7) 1325.3 (987.3 to 1917.5) 1592.3 (1396.4 to 2114.8) 1825.0 (1549.5 to 2166.9) 1372.3 (878.4 to 1881.6)
Range 87 to 821 1133 to 1655 68 to 3036 657 to 2756 1011 to 3491 68 to 3491
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
TNFalpha
Mean (SD) 1.1 (2.5) 0.5 (0.7) 3.3 (3.0) 1.3 (1.7) 1.6 (1.5) 2.3 (2.7)
Median (IQR) 0.0 (0.0 to 0.0) 0.5 (0.2 to 0.7) 2.7 (1.4 to 4.6) 0.7 (0.0 to 1.6) 1.2 (0.8 to 2.0) 1.5 (0.0 to 3.6)
Range 0 to 9 0 to 1 0 to 15 0 to 5 0 to 6 0 to 15
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
Inflammatory_index
Mean (SD) -5.1 (4.4) 8.7 (14.3) 0.1 (5.1) 0.7 (3.9) 1.3 (3.3) -0.2 (5.2)
Median (IQR) -7.2 (-7.7 to -5.5) 8.7 (3.6 to 13.7) -0.7 (-3.2 to 2.1) 0.2 (-1.3 to 1.2) 0.5 (-0.4 to 3.4) -0.5 (-3.5 to 1.8)
Range -8 to 7 -1 to 19 -7 to 23 -4 to 14 -4 to 9 -8 to 23
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)
Inflammatory_index_IFNgamma
Mean (SD) -5.0 (4.4) 8.8 (14.3) 0.2 (4.9) 0.8 (3.9) 1.5 (3.3) -0.1 (5.1)
Median (IQR) -7.1 (-7.5 to -5.3) 8.8 (3.8 to 13.9) -0.6 (-3.1 to 2.3) 0.3 (-1.2 to 1.3) 0.6 (-0.2 to 3.6) -0.4 (-3.3 to 2.0)
Range -8 to 7 -1 to 19 -7 to 22 -4 to 14 -4 to 9 -8 to 22
Missing 0 (0%) 30 (93.8%) 13 (18.8%) 0 (0%) 0 (0%) 43 (27.7%)

3.7 Cohort characterisation - table 7

table1(~ aPL_signal
       | Isotype,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = data_vertical_tp)
IgA
(N=1550)
IgG
(N=1550)
IgM
(N=1550)
Overall
(N=4650)
aPL_signal
Mean (SD) 8.1 (10.9) 2.4 (7.2) 9.4 (17.7) 6.6 (13.1)
Median (IQR) 5.0 (3.0 to 10.0) 0.0 (0.0 to 0.0) 0.0 (0.0 to 16.0) 0.0 (0.0 to 7.0)
Range 0 to 121 0 to 89 0 to 101 0 to 121
Missing 140 (9.0%) 0 (0%) 0 (0%) 140 (3.0%)

4 DESCRIPTIVE OVERVIEW AND DATA EXPLORATION

4.1 We look at the data with heatmaps, using informative groups

Data_heatmap <- data.frame(data[c(1, 11, 21:50, 51:56, 62:78)])
Data_heatmap <- Data_heatmap[order(Data_heatmap$COVID_vaccination_Group, Data_heatmap$Unique_sample_ID_for_study),]

Data_heatmap_aPL <- Data_heatmap %>% 
  dplyr::select(CL_IgG:PT_IgA)

Data_heatmap_aPL <- as.matrix(Data_heatmap_aPL)
rownames(Data_heatmap_aPL) <- Data_heatmap[,1]

Data_heatmap_CoV2 <- Data_heatmap %>% 
  dplyr::select(Spike_IgG:NC_IgA)

Data_heatmap_CoV2 <- as.matrix(Data_heatmap_CoV2)
rownames(Data_heatmap_CoV2) <- Data_heatmap[,1]

Data_heatmap_Cytokines <- Data_heatmap %>% 
  dplyr::select(GCSF:Inflammatory_index)

Data_heatmap_Cytokines <- as.matrix(Data_heatmap_Cytokines)
rownames(Data_heatmap_Cytokines) <- Data_heatmap[,1]

##### aPL

# Choose colours
my_paletteaPL <- colorRampPalette(brewer.pal(11, "Spectral"))(n = 99)

# scale for heatmap
colorsaPL = c(seq(0.0,100.0,length=100)) 

# Create the heatmap
heatmap.2(Data_heatmap_aPL,
          Rowv=FALSE,
          Colv="NA",
          margin=c(8,15),dendrogram=c("none"),
          RowSideColors=col1[as.factor(Data_heatmap$COVID_vaccination_Group)],
          col= my_paletteaPL,
          symm=F,symkey=F,symbreaks=T, scale="none",
          breaks=colorsaPL,
          sepwidth=c(0.01,0.01),
          sepcolor="white",
          colsep=1:ncol(Data_heatmap_aPL),
          labCol=NULL,
          cexRow=0.01, cexCol=0.75,
          na.color="grey",# Opposed to other molecular data, we have NA data here, so we mark them.
          key=TRUE,keysize=1.5,trace="none",density.info=c("none"))

##### CoV2

# Choose colours
my_paletteCoV2 <- colorRampPalette(brewer.pal(9, "Purples"))(n = 99)

# scale for heatmap
colorsCoV2 = c(seq(2.0,6.0,length=100)) 

# Create the heatmap


heatmap.2(Data_heatmap_CoV2,
          Rowv=FALSE,
          Colv="NA",
          margin=c(8,15),dendrogram=c("none"),
          RowSideColors=col1[as.factor(Data_heatmap$COVID_vaccination_Group)],
          col= my_paletteCoV2,
          symm=F,symkey=F,symbreaks=T, scale="none",
          breaks=colorsCoV2,
          sepwidth=c(0.01,0.01),
          sepcolor="white",
          colsep=1:ncol(Data_heatmap_CoV2),
          labCol=NULL,
          cexRow=0.01, cexCol=0.75,
          na.color="grey",# Opposed to other molecular data, we have NA data here, so we mark them.
          key=TRUE,keysize=1.5,trace="none",density.info=c("none"))

##### Cytokines

# Choose colours
my_paletteCytokines <- colorRampPalette(brewer.pal(9, "RdBu"))(n = 99)

# scale for heatmap
colorsCytokines = c(seq(-3,3,length=100)) 

# Create the heatmap

heatmap.2(Data_heatmap_Cytokines,
         Rowv=FALSE,
         Colv="NA",
         margin=c(8,15),dendrogram=c("none"),
         RowSideColors=col1[as.factor(Data_heatmap$COVID_vaccination_Group)],
         col= my_paletteCytokines,
         symm=F,symkey=F,symbreaks=T, scale="column", #As the different columns vary quite a bit, we scale the data here
         breaks=colorsCytokines, #Careful when setting the breaks when scale = 'column' is on...
         sepwidth=c(0.01,0.01),
         sepcolor="white",
         colsep=1:ncol(Data_heatmap_Cytokines),
         labCol=NULL,
         cexRow=0.01, cexCol=0.75,
         na.color="grey",# Opposed to other molecular data, we have NA data here, so we mark them.
         key=TRUE,keysize=1.5,trace="none",density.info=c("none"))
## Warning in heatmap.2(Data_heatmap_Cytokines, Rowv = FALSE, Colv = "NA", : Using
## scale="row" or scale="column" when breaks arespecified can produce unpredictable
## results.Please consider using only one or the other.

  • Blue: non-infected/non-vaccinated influenza patients
  • Yellow: non-infected/non-vaccinated control patients
  • Grey: infected/non-vaccinated
  • Scarlet: non-infected/vaccinated
  • Light blue: infected/vaccinated

4.2 We look at the data with ridge plots using informative groups

##### aPL

ggboxplot(
    data=subset(data_vertical, CoV2_type == 'Spike_IgG'), x = "aPL_type", y = "aPL_signal",
    color = "COVID_vaccination_Group", palette = "jco", outlier.shape = NA, add = 'mean'
    ) +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()

subset(data_vertical,CoV2_type == 'Spike_IgG') %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("CL_IgG"), "A.CL_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PA_IgG"), "B.PA_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PC_IgG"), "C.PC_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PE_IgG"), "D.PE_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PG_IgG"), "E.PG_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PI_IgG"), "F.PI_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PS_IgG"), "G.PS_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("AnnV_IgG"), "H.AnV_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("ß2GF.PI_IgG"), "I.ß2GPI_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PT_IgG"), "J.PT_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("CL_IgM"), "K.CL_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PA_IgM"), "L.PA_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PC_IgM"), "M.PC_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PE_IgM"), "N.PE_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PG_IgM"), "O.PG_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PI_IgM"), "P.PI_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PS_IgM"), "Q.PS_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("AnnV_IgM"), "R.AnV_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("ß2GP.PI_IgM"), "S.ß2GPI_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PT_IgM"), "T.PT_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("CL_IgA"), "U.CL_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PA_IgA"), "V.PA_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PC_IgA"), "W.PC_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PE_IgA"), "X.PE_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PG_IgA"), "Y.PG_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PI_IgA"), "Z.PI_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PS_IgA"), "ZA.PS_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("AnnV_IgA"), "ZB.AnV_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("ß2GZ.PI_IgA"), "ZC.ß2GPI_IgA")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PT_IgA"), "ZD.PT_IgA")) %>%
  ggplot(aes(x = as.numeric(aPL_signal) , y = aPL_type, fill=COVID_vaccination_Group)) +
    geom_density_ridges(
      aes(point_color = COVID_vaccination_Group, point_fill = COVID_vaccination_Group, scale = 0.5),
      alpha = .8, point_alpha = 1, jittered_points = TRUE) + #bandwith could be manually adjusted
    ggtitle('All aPL grouped') +
    xlab('aPL signal') +
    ylab('aPL types') +
    scale_fill_manual(values=col1) +
    scale_point_color_hue(l = 40) +
    ggplot2:::manual_scale("point_color", values = col1, guide = "none") +
    theme_ridges(grid = FALSE, center = TRUE) +
    geom_vline(xintercept=50, linetype="dashed", 
               color = "red", linewidth=1)

##### CoV-2

ggboxplot(
  data=subset(data_vertical, aPL_type == 'CL_IgG'), x = 'CoV2_type', y = "CoV2_signal",
  color = "COVID_vaccination_Group", palette = "jco", outlier.shape = NA
) +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=2, linetype="dashed", color = "red", size=1) +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()

subset(data_vertical, aPL_type == 'CL_IgG') %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(CoV2_type = str_replace_all(CoV2_type, fixed("Spike_IgG"), "A.Spike_IgG")) %>%
  dplyr::mutate(CoV2_type = str_replace_all(CoV2_type, fixed("RBD_IgG"), "B.RBD_IgG")) %>%
  dplyr::mutate(CoV2_type = str_replace_all(CoV2_type, fixed("NC_IgG"), "C.NC_IgG")) %>%
  dplyr::mutate(CoV2_type = str_replace_all(CoV2_type, fixed("Spike_IgA"), "D.Spike_IgA")) %>%
  dplyr::mutate(CoV2_type = str_replace_all(CoV2_type, fixed("RBD_IgA"), "E.RBD_IgA")) %>%
  dplyr::mutate(CoV2_type = str_replace_all(CoV2_type, fixed("NC_IgA"), "F.NC_IgA")) %>%
  ggplot(aes(x = as.numeric(CoV2_signal) , y = CoV2_type, fill=COVID_vaccination_Group)) +
    geom_density_ridges(
      aes(point_color = COVID_vaccination_Group, point_fill = COVID_vaccination_Group, scale = 0.5),
      alpha = .8, point_alpha = 1, jittered_points = TRUE) + #bandwith could be manually adjusted
    ggtitle('All CoV2 grouped') +
    xlab('p(EC50)') +
    ylab('CoV2 isotypes') +
    scale_fill_manual(values=col3) +
    scale_point_color_hue(l = 40) +
    ggplot2:::manual_scale("point_color", values = col3, guide = "none") +
    theme_ridges(grid = FALSE, center = TRUE) +
    geom_vline(xintercept=2, linetype="dashed", 
               color = "red", linewidth=1)

##### Cytokines

subset(cytokines_vertical_z, #Careful, we also use the z-transformed data here, like in the heatmap
              aPL_type == 'CL_IgG' &
              CoV2_type == 'Spike_IgG') %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("GCSF"), "A.GCSF")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("GMCSF"), "B.GMCSF")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IFNalpha"), "C.IFNalpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IFNgamma"), "D.IFNgamma")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL1beta"), "E.IL1beta")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL4"), "F.IL4")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL6"), "G.IL6")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL8"), "H.IL8")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL10"), "I.IL10")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL17A"), "J.IL17A")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IP10"), "K.IP10")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("MIP1alpha"), "L.MIP1alpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("MIP1beta"), "M.MIP1beta")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("S100A8_A9"), "N.S100A8_A9")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("SDF1alpha"), "O.SDF1alpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("TNFalpha"), "P.TNFalpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("Inflammatory_index"), "Q.Inflammatory_index")) %>%
  ggplot(aes(x = as.numeric(Cytokine_value) , y = Cytokine_signal, fill=COVID_vaccination_Group)) +
    geom_density_ridges(
      aes(point_color = COVID_vaccination_Group, point_fill = COVID_vaccination_Group, scale = 0.5),
      alpha = .8, point_alpha = 1, jittered_points = TRUE) + #bandwith could be manually adjusted
    ggtitle('All Cytokines grouped') +
    xlab('Cytokine signal (Z-Score)') +
    ylab('Cytokine types') +
    scale_fill_manual(values=col3) +
    scale_point_color_hue(l = 40) +
    ggplot2:::manual_scale("point_color", values = col3, guide = "none") +
    theme_ridges(grid = FALSE, center = TRUE)

Because of having only 0 values within a column, the following parameters will have to be excluded in a correlational analysis:

  • aPL PC_IgG
  • aPL PE_IgG
  • aPL PC_IgM

Thus, we included 8/10 for IgG, 9/10 for IgM, and 10/10 (all) for IgA in this particular plot.

4.3 We look at the data with PCA using informative groups

# aPL
aPL_PCA <- data %>%
  dplyr::select(Unique_sample_ID_for_study,
                COVID_vaccination_Group,
                CL_IgG:PT_IgA) %>%
  na.omit()

groups_PCA_aPL <- as.factor(aPL_PCA$COVID_vaccination_Group[1:141])
names_PCA_aPL <- as.factor(aPL_PCA$Unique_sample_ID_for_study[1:141])

aPL_PCA <- aPL_PCA %>%
  dplyr::select(-Unique_sample_ID_for_study,
                -COVID_vaccination_Group,
                CL_IgG:PT_IgA,
                -PC_IgG, -PE_IgG, -PC_IgM)

rownames(aPL_PCA) <- names_PCA_aPL    

set.seed(42)
aPL_PCA.pca <- prcomp(aPL_PCA, center = TRUE, scale = TRUE)

fviz_pca_biplot(aPL_PCA.pca, 
                label="var",
                geom.var = c("arrow", "text"),
                col.ind=groups_PCA_aPL,
                palette = ("jco"),
                habillage='none',
                addEllipses=FALSE, 
                ellipse.level=0.95,
                repel=TRUE) +
  theme_minimal()

# SARS-CoV-2 antibodies
COV2_PCA <- data %>%
  dplyr::select(Unique_sample_ID_for_study,
                COVID_vaccination_Group,
                Spike_IgG:NC_IgA) %>%
  na.omit()

groups_COV2_PCA <- as.factor(COV2_PCA$COVID_vaccination_Group[1:125])
names_COV2_PCA <- as.factor(COV2_PCA$Unique_sample_ID_for_study[1:125])

COV2_PCA <- COV2_PCA %>%
  dplyr::select(-Unique_sample_ID_for_study,
                -COVID_vaccination_Group,
                Spike_IgG:NC_IgA)

rownames(COV2_PCA) <- names_COV2_PCA   

set.seed(42)
COV2_PCA.pca <- prcomp(COV2_PCA, center = TRUE, scale = TRUE)

fviz_pca_biplot(COV2_PCA.pca, 
                label="var",
                geom.var = c("arrow", "text"),
                col.ind=groups_COV2_PCA,
                palette = ("jco"),
                habillage='none',
                addEllipses=FALSE, 
                ellipse.level=0.95,
                repel=TRUE) +
  theme_minimal()

# Cytokines
Cyto_PCA <- data %>%
  dplyr::select(Unique_sample_ID_for_study,
                COVID_vaccination_Group,
                GCSF:Inflammatory_index) %>%
  na.omit()

groups_PCA <- as.factor(Cyto_PCA$COVID_vaccination_Group[1:112])
names_PCA <- as.factor(Cyto_PCA$Unique_sample_ID_for_study[1:112])

Cyto_PCA <- Cyto_PCA %>%
  dplyr::select(-Unique_sample_ID_for_study,
                -COVID_vaccination_Group,
                GCSF:Inflammatory_index)

rownames(Cyto_PCA) <- names_PCA    

set.seed(42)
Cyto_PCA.pca <- prcomp(Cyto_PCA, center = TRUE, scale = TRUE)

fviz_pca_biplot(Cyto_PCA.pca, 
                label="var",
                geom.var = c("arrow", "text"),
                col.ind=groups_PCA,
                palette = ("jco"),
                habillage='none',
                addEllipses=FALSE, 
                ellipse.level=0.95,
                repel=TRUE) +
  theme_minimal()

4.4 We look at the data with PCA using informative groups, by combining all features

# We combine all of these features in PCA

All_molecular_PCA <- data %>%
  dplyr::select(Unique_sample_ID_for_study,
                COVID_vaccination_Group,
                GCSF:Inflammatory_index,
                CL_IgG:PT_IgA,
                Spike_IgG:NC_IgA
                ) %>%
  na.omit()

groups_PCA <- as.factor(All_molecular_PCA$COVID_vaccination_Group[1:112])
names_PCA <- as.factor(All_molecular_PCA$Unique_sample_ID_for_study[1:112])

All_molecular_PCA <- All_molecular_PCA %>%
  dplyr::select(-Unique_sample_ID_for_study,
                -COVID_vaccination_Group,
                GCSF:Inflammatory_index,
                CL_IgG:PT_IgA,
                -PC_IgG, -PE_IgG, -PC_IgM,
                Spike_IgG:NC_IgA)

rownames(All_molecular_PCA) <- names_PCA    

set.seed(42)
All_molecular_PCA.pca <- prcomp(All_molecular_PCA, center = TRUE, scale = TRUE)

fviz_pca_biplot(All_molecular_PCA.pca, 
                label="var",
                geom.var = c("arrow", 'text'),
                col.ind=groups_PCA,
                col.var='green',
                palette = ("jco"),
                habillage='none',
                addEllipses=FALSE, 
                ellipse.level=0.95,
                repel=TRUE) +
  theme_minimal()
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

# For an analysis we conduct later - we aim to find out whether in feature space, we can separate between those individuals with and those without thromboses...

All_molecular_PCA_thrombosis <- data %>%
  dplyr::select(Unique_sample_ID_for_study,
                Thrombosis_group,
                GCSF:Inflammatory_index,
                CL_IgG:PT_IgA,
                Spike_IgG:NC_IgA
                ) %>%
  na.omit()

groups_PCA <- as.factor(All_molecular_PCA_thrombosis$Thrombosis_group[1:112])
names_PCA <- as.factor(All_molecular_PCA_thrombosis$Unique_sample_ID_for_study[1:112])

All_molecular_PCA_thrombosis <- All_molecular_PCA_thrombosis %>%
  dplyr::select(-Unique_sample_ID_for_study,
                -Thrombosis_group,
                GCSF:Inflammatory_index,
                CL_IgG:PT_IgA,
                -PC_IgG, -PE_IgG, -PC_IgM,
                Spike_IgG:NC_IgA)

rownames(All_molecular_PCA_thrombosis) <- names_PCA    

set.seed(42)
All_molecular_PCA_thrombosis.pca <- prcomp(All_molecular_PCA_thrombosis, center = TRUE, scale = TRUE)

fviz_pca_biplot(All_molecular_PCA_thrombosis.pca, 
                label="var",
                geom.var = c("arrow", 'text'),
                col.ind=groups_PCA,
                col.var='green',
                palette = ("jco"),
                habillage='none',
                addEllipses=FALSE, 
                ellipse.level=0.95,
                repel=TRUE) +
  theme_minimal()
## Warning: ggrepel: 1 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

We observe that ‘infected’ patients cluster slightly apart from the rest. However, there is no distinction of those with thrombotic events according to molecular features. However, the second analysis is part of another chapter and not of the initial exploratory analysis…

4.5 Extra chapter: stratification of cytokines

As we have so much more cytokine data in the ICU study, we might perform this analysis more extensively in our next study/manuscript. However, using limited data, we can look into cytokine profiles aiming to distinguish between ICU patients with acute influenza and ICU patients with acute COVID-19.

# We perform a PCA on the cytokine data and group according to acute infection with influenza or acute infection with SARS-CoV-2

cytokine_PCA_group_acute <- cytokines_z %>%
  dplyr::filter(Acute_infection==TRUE) %>%
  dplyr::select(COVID_vaccination_Group)

cytokine_PCA_group_acute_names <- cytokines_z %>%
  dplyr::filter(Acute_infection==TRUE) %>%
  dplyr::select(Unique_sample_ID_for_study)

cytokine_PCA_group_acute_colours <- cytokines_z %>%
  dplyr::filter(Acute_infection==TRUE) %>%
  dplyr::select(COVID_vaccination_Group) %>% 
  dplyr::mutate(COVID_vaccination_Group = str_replace_all(COVID_vaccination_Group, fixed("00_Influenza_Non-infected/non-vaccinated"), "#0073C299")) %>% 
  dplyr::mutate(COVID_vaccination_Group = str_replace_all(COVID_vaccination_Group, fixed("02_Infected/non-vaccinated"), "#86868699")) %>%
  dplyr::mutate(COVID_vaccination_Group = str_replace_all(COVID_vaccination_Group, fixed("04_Infected/vaccinated"), "#7AA6DC99")) %>%
  as.matrix()

cytokine_PCA_CK_groups <- Cytokines_classifications %>%
  dplyr::select(Cyto_groups) %>%
  dplyr::mutate(Cyto_groups = str_replace_all(Cyto_groups, fixed("Antiinflammatory"), "#eaef55")) %>%
  dplyr::mutate(Cyto_groups = str_replace_all(Cyto_groups, fixed("Proinflammatory"), "#9dcd5e")) %>%
  dplyr::mutate(Cyto_groups = str_replace_all(Cyto_groups, fixed("Mf and T-cells recruitement"), "#61a864")) %>%
  dplyr::mutate(Cyto_groups = str_replace_all(Cyto_groups, fixed("Anti-Pro-inflammatory"), "#398060")) %>%
  dplyr::mutate(Cyto_groups = str_replace_all(Cyto_groups, fixed("Compound"), "#2a584f")) %>%
  as.matrix()

cytokine_PCA_Th_groups <- Cytokines_classifications %>%
  dplyr::select(Th_group) %>%
  dplyr::mutate(Th_group = str_replace_all(Th_group, fixed("Th1"), "#ef55e2")) %>%
  dplyr::mutate(Th_group = str_replace_all(Th_group, fixed("Th2"), "#c151cb")) %>%
    dplyr::mutate(Th_group = str_replace_all(Th_group, fixed("Th_17"), "#974bb2")) %>%
  dplyr::mutate(Th_group = str_replace_all(Th_group, fixed("1_and_2"), "#714295")) %>%
  dplyr::mutate(Th_group = str_replace_all(Th_group, fixed("1_and_17"), "#503777")) %>%
  dplyr::mutate(Th_group = str_replace_all(Th_group, fixed("Compound"), "#342a58")) %>%
  as.matrix()

cytokine_PCA_chemo_groups <- Cytokines_classifications %>%
  dplyr::select(Chemo_cluster) %>%
  dplyr::mutate(Chemo_cluster = str_replace_all(Chemo_cluster, fixed("Key_pro"), "#eba613")) %>%
  dplyr::mutate(Chemo_cluster = str_replace_all(Chemo_cluster, fixed("Key_anti"), "#5a9c42")) %>%
    dplyr::mutate(Chemo_cluster = str_replace_all(Chemo_cluster, fixed("Chemokine"), "#00766f")) %>%
  dplyr::mutate(Chemo_cluster = str_replace_all(Chemo_cluster, fixed("Rest"), "#2a4858")) %>%
  as.matrix()

cytokine_PCA_group_acute = as.factor(cytokine_PCA_group_acute$COVID_vaccination_Group[1:80])
cytokine_PCA_group_acute_names = as.factor(cytokine_PCA_group_acute_names$Unique_sample_ID_for_study[1:80])

set.seed(1)  
cytokines_z %>%
  dplyr::filter(Acute_infection==TRUE) %>%
  dplyr::select(GCSF:Inflammatory_index) %>%
  data.matrix() %>%
  prcomp(center = TRUE, scale = FALSE) %>%
  fviz_pca_biplot(label="var",
                geom.var = c("arrow", 'text'),
                col.ind=cytokine_PCA_group_acute,
                col.var='green',
                palette = (col5),
                habillage='none',
                addEllipses=FALSE, 
                ellipse.level=0.95,
                repel=TRUE) +
  theme_minimal()

# We perform an unsupervised hierarchical clustering on the cytokine data and group according to acute infection with influenza or acute infection with SARS-CoV-2

combined_bars <- cbind(cytokine_PCA_CK_groups, cytokine_PCA_Th_groups, cytokine_PCA_chemo_groups)

# We group according to cytokines
set.seed(1)
cytokines_z %>%
  dplyr::filter(Acute_infection==TRUE) %>%
  dplyr::select(GCSF:Inflammatory_index) %>%
  data.matrix() %>%
  t() %>% dist(method = "euclidean") %>% hclust(method='complete') %>% as.dendrogram -> dend_cyto

par(mar = c(10,2,1,1))
dend_cyto %>%
set("leaves_pch", 19)  %>%
set("nodes_cex", 0.7) %>%
plot()
colored_bars(colors = combined_bars, dend=dend_cyto)

#colored_bars(colors = cytokine_PCA_CK_groups, dend = dend_cyto)

# We group according to individuals
set.seed(1)  
cytokines_z %>%
  dplyr::filter(Acute_infection==TRUE) %>%
  dplyr::select(GCSF:Inflammatory_index) %>%
  data.matrix() %>%
  dist(method = "euclidean") %>% hclust(method='complete') %>% as.dendrogram -> dend_pat

par(mar = c(10,2,1,1))
dend_pat %>% 
set("leaves_pch", 19)  %>% 
set("nodes_cex", 0.7) %>% 
plot()
colored_bars(colors = cytokine_PCA_group_acute_colours, dend = dend_pat)

# We check for statistical differences between the groups, for individual cytokines

cytokines_acute_longer %>%
  dplyr::filter(Acute_infection==TRUE) %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("GCSF"), "A.GCSF")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("GMCSF"), "B.GMCSF")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IFNalpha"), "C.IFNalpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IFNgamma"), "D.IFNgamma")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL1beta"), "E.IL1beta")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL4"), "F.IL4")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL6"), "G.IL6")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL8"), "H.IL8")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL10"), "I.IL10")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL17A"), "J.IL17A")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IP10"), "K.IP10")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("MIP1alpha"), "L.MIP1alpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("MIP1beta"), "M.MIP1beta")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("S100A8_A9"), "N.S100A8_A9")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("SDF1alpha"), "O.SDF1alpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("TNFalpha"), "P.TNFalpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("Inflammatory_index"), "Q.Inflammatory_index")) %>%
  ggboxplot(x = "COVID_vaccination_Group", y = "Cytokine_value",
    color = "COVID_vaccination_Group", palette=col5, outlier.shape = NA,
    merge = TRUE
  ) +
    geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
    guides(x =  guide_axis(angle = 90)) +
    facet_wrap(~Cytokine_signal, scales='free_y', ncol=9) +
    theme(axis.text.y = element_text(size = 8)) +    
    labs(y="Plasma concentration (pg/ml)",x="",title="")

compare_means(Cytokine_value ~ COVID_vaccination_Group, 
              data=subset(cytokines_acute_longer, Acute_infection==TRUE),
              method = "wilcox", group.by='Cytokine_signal', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 50 × 9
##    Cytokine_signal .y.        group1 group2       p p.adj p.for…¹ p.sig…² method
##    <chr>           <chr>      <chr>  <chr>    <dbl> <dbl> <chr>   <chr>   <chr> 
##  1 GCSF            Cytokine_… 02_In… 04_In… 0.0630  0.11  0.06299 ns      Wilco…
##  2 GCSF            Cytokine_… 02_In… 00_In… 0.0302  0.076 0.03025 ns      Wilco…
##  3 GCSF            Cytokine_… 04_In… 00_In… 0.188   0.26  0.18807 ns      Wilco…
##  4 GMCSF           Cytokine_… 02_In… 04_In… 0.0426  0.085 0.04259 ns      Wilco…
##  5 GMCSF           Cytokine_… 02_In… 00_In… 0.0393  0.082 0.03926 ns      Wilco…
##  6 GMCSF           Cytokine_… 04_In… 00_In… 0.812   0.86  0.81234 ns      Wilco…
##  7 IFNalpha        Cytokine_… 02_In… 04_In… 0.127   0.19  0.12738 ns      Wilco…
##  8 IFNalpha        Cytokine_… 02_In… 00_In… 0.0341  0.078 0.03415 ns      Wilco…
##  9 IFNalpha        Cytokine_… 04_In… 00_In… 0.00602 0.022 0.00602 *       Wilco…
## 10 IFNgamma        Cytokine_… 02_In… 04_In… 0.390   0.48  0.39014 ns      Wilco…
## # … with 40 more rows, and abbreviated variable names ¹​p.format, ²​p.signif
cytokines_acute_stats <- compare_means(Cytokine_value ~ COVID_vaccination_Group, 
                                       data=subset(cytokines_acute_longer, Acute_infection==TRUE),
                                       method = "wilcox", group.by='Cytokine_signal', 
                                       symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)

### We conducted the statistics and the visualisations on the original cytokine data, i.e. concentrations in pg/ml

# And now, we also include some data from other datasets.
#--https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3146842/ (only mean)
#--https://doi.org/10.1016/j.dib.2021.106857 (only mean)
#--https://doi.org/10.4269/ajtmh.20-1110 (only median) -- we do not use this dataset
#--https://www.science.org/doi/10.1126/sciadv.abe3024 (mean and median - we calculated ourselves)
#--Our own ICU cohort study in production (mean and median)
#--The current study (mean and median)

# We only show the cytokines we have been using.

Cytokines_ref %>%
    dplyr::filter(!Cytokine=='EGF') %>%
    dplyr::filter(!Cytokine=='Flt-3L') %>%
    dplyr::filter(!Cytokine=='IL-15') %>%
    dplyr::filter(!Cytokine=='IL-2') %>%
    dplyr::filter(!Cytokine=='MCP 1') %>%
    dplyr::filter(!Cytokine=='PDGF-AA') %>%
    dplyr::filter(!Cytokine=='sCD40L') %>%
    dplyr::filter(!Cytokine=='FGF-2') %>%
    dplyr::filter(!Cytokine=='TGF-α') %>%
    ggboxplot(x = "Main", y = "Mean",
    palette=c('#440154','#414487','#2a788e','#22a884','#7ad151','#fde725'), outlier.shape = NA,
    merge = TRUE
  ) +
    geom_point(shape=16, size=4, position=position_dodge(0.5), aes(color = Study)) +
    geom_errorbar(aes(ymin=(Mean-STDEV), ymax=(Mean+STDEV), colour=Study), width=0.5, position=position_dodge(0.5)) +
    guides(x =  guide_axis(angle = 90)) +
    facet_wrap(~Cytokine, scales='free_y', ncol=8) +
    scale_y_log10() + #As we do a log10 transform to be able to compare the data, some standard deviations that turn negative cannot be properly displayed.
    theme(axis.text.y = element_text(size = 8)) +    
    labs(y="Plasma concentration (pg/ml)",x="",title="")
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning in self$trans$transform(x): NaNs produced
## Warning: Transformation introduced infinite values in continuous y-axis
## Transformation introduced infinite values in continuous y-axis
## Warning: Removed 264 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 258 rows containing missing values (`geom_point()`).

# We can also show it without log10 axis

Cytokines_ref %>%
    dplyr::filter(!Cytokine=='EGF') %>%
    dplyr::filter(!Cytokine=='Flt-3L') %>%
    dplyr::filter(!Cytokine=='IL-15') %>%
    dplyr::filter(!Cytokine=='IL-2') %>%
    dplyr::filter(!Cytokine=='MCP 1') %>%
    dplyr::filter(!Cytokine=='PDGF-AA') %>%
    dplyr::filter(!Cytokine=='sCD40L') %>%
    dplyr::filter(!Cytokine=='FGF-2') %>%
    dplyr::filter(!Cytokine=='TGF-α') %>%
    ggboxplot(x = "Main", y = "Mean",
    palette=c('#440154','#414487','#2a788e','#22a884','#7ad151','#fde725'), outlier.shape = NA,
    merge = TRUE
  ) +
    geom_point(shape=16, size=4, position=position_dodge(0.5), aes(color = Study)) +
    geom_errorbar(aes(ymin=(Mean-STDEV), ymax=(Mean+STDEV), colour=Study), width=0.5, position=position_dodge(0.5)) +
    guides(x =  guide_axis(angle = 90)) +
    facet_wrap(~Cytokine, scales='free_y', ncol=8) +
    theme(axis.text.y = element_text(size = 8)) +    
    labs(y="Plasma concentration (pg/ml)",x="",title="")
## Warning: Removed 258 rows containing non-finite values (`stat_boxplot()`).
## Removed 258 rows containing missing values (`geom_point()`).

# How do cytokine levels modulate with regard to DPO? We only have DPO data for our dataset...

cytokines_vertical %>%
  dplyr::filter(Isotype == 'IgG' & Target == 'CL') %>%
  dplyr::select(Unique_sample_ID_for_study, Sex, Age, DPOCoV2_or_FLU, Cytokine_signal, Cytokine_value) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("GCSF"), "A.GCSF")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("GMCSF"), "B.GMCSF")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IFNalpha"), "C.IFNalpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IFNgamma"), "D.IFNgamma")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL1beta"), "E.IL1beta")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL4"), "F.IL4")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL6"), "G.IL6")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL8"), "H.IL8")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL10"), "I.IL10")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IL17A"), "J.IL17A")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("IP10"), "K.IP10")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("MIP1alpha"), "L.MIP1alpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("MIP1beta"), "M.MIP1beta")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("S100A8_A9"), "N.S100A8_A9")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("SDF1alpha"), "O.SDF1alpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("TNFalpha"), "P.TNFalpha")) %>%
  dplyr::mutate(Cytokine_signal = str_replace_all(Cytokine_signal, fixed("Inflammatory_index"), "Q.Inflammatory_index")) %>%
  ggplot(aes(x=DPOCoV2_or_FLU, y=Cytokine_value)) +
  geom_point() +
  geom_smooth(method='lm') +
  stat_cor(method='spearman') +
  facet_wrap(~Cytokine_signal, scales='free_y', ncol=9) +
  theme(axis.text.y = element_text(size = 8)) +
  theme_classic() +   
  labs(x="Days post symptom onset",y="Plasma concentration (pg/ml)",title="")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 4488 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 4488 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 4488 rows containing missing values (`geom_point()`).

A more comprehensive analysis with more data can be performed in the next study.

5 COMPARISON WITH PREVIOUS STUDY

Can we reproduce the findings from our previous study?

5.1 We use various approaches to compare the data from the previous study

5.1.1 We compare individual aPLs against each other

5.1.1.1 Restriction to infected/non-vaccinated and non-infected/non-vaccinated

We separate here into different aPL types. Additionally, we only show IgM and IgG as there are no comparable data for IgA. As an additional restriction, we here restrict the analysis to infected/non-vaccinated and non-infected/non-vaccinated, to have the proper comparisons. To do this properly, we quickly enumerate the basic characteristics of the two datasets.

# Age and Sex distributions, for individuals, not samples

Cohort_comparison_sexage <- 
  ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                Isotype == 'IgM' &
                aPL_type == 'PT_IgM' &
                Timepoint == 1), aes(x = dataset, y = as.numeric(Age), color=dataset)) +
  geom_boxplot() +
  geom_point() +
  facet_wrap(~Sex,ncol=2) +
  theme(axis.text.y = element_text(size = 8)) +
  scale_fill_manual(values = c('#E64B35B2', '#4DBBD5B2')) +
  scale_color_manual(values = c('#E64B35B2', '#4DBBD5B2')) +
  labs(y="Age (years)",x="Dataset",title="Comparison of sex and age between datasets")

compare_means(Age ~ dataset, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                Isotype == 'IgM' &
                aPL_type == 'PT_IgM' &
                Timepoint == 1),
                method = "wilcox", group.by = 'Sex', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   Sex   .y.   group1 group2      p p.adj p.format p.signif method  
##   <chr> <chr> <chr>  <chr>   <dbl> <dbl> <chr>    <chr>    <chr>   
## 1 M     Age   new    old    0.892   0.89 0.892    ns       Wilcoxon
## 2 F     Age   new    old    0.0511  0.1  0.051    ns       Wilcoxon
# DPO, for all samples (including repeats)

Cohort_comparison_DPO <-
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                Isotype == 'IgM' &
                aPL_type == 'PT_IgM'), aes(x = dataset, y = as.numeric(DPOCoV2_or_FLU), color=dataset)) +
  geom_boxplot() +
  geom_point() +
  theme(axis.text.y = element_text(size = 8)) +
  scale_fill_manual(values = c('#E64B35B2', '#4DBBD5B2')) +
  scale_color_manual(values = c('#E64B35B2', '#4DBBD5B2')) +
  labs(y="DPO (days)",x="Dataset",title="Comparison of DPO between datasets")

compare_means(DPOCoV2_or_FLU ~ dataset, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                Isotype == 'IgM' &
                aPL_type == 'PT_IgM'),
                method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 1 × 8
##   .y.            group1 group2            p       p.adj p.format p.signif method
##   <chr>          <chr>  <chr>         <dbl>       <dbl> <chr>    <chr>    <chr> 
## 1 DPOCoV2_or_FLU new    old    0.0000000185 0.000000018 1.8e-08  ****     Wilco…
# Disease severity, per individual, not samples

Cohort_comparison_severity <-
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                Isotype == 'IgM' &
                aPL_type == 'PT_IgM' &
                Timepoint == 1), aes(x = SeverityCoV2_or_Flu, color=dataset)) +
  geom_bar(position='dodge', aes(fill=dataset)) +
  theme(axis.text.y = element_text(size = 8)) +
  scale_fill_manual(values = c('#E64B35B2', '#4DBBD5B2')) +
  scale_color_manual(values = c('#E64B35B2', '#4DBBD5B2')) +
  labs(y="Number of individuals",x="Disease severity",title="Comparison of disease severity between datasets")

# Comparison of levels

Cohort_comparison_aPL <-
gghistogram(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                (Isotype == 'IgM' |
                Isotype == 'IgG')),
  x = "aPL_signal", 
  y = "..density..",
  rug = FALSE,
  fill = "COVID_vaccination_Group", 
  palette = c(col2),
  ) +
  facet_grid(rows=vars(Isotype), cols= vars(dataset), scales='free_y', switch='y') +
  theme_classic()
## Warning: Using `bins = 30` by default. Pick better value with the argument
## `bins`.
Cohort_comparison_aPL

ggarrange(Cohort_comparison_sexage, Cohort_comparison_DPO, Cohort_comparison_severity, Cohort_comparison_aPL,
          labels = c("A", "B", 'C', 'D'),
          ncol = 2, nrow = 2)
## Warning: Removed 52 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 52 rows containing missing values (`geom_point()`).

The datasets are not fundamentally different in terms of age and sex. However, the range of DPO included is quite different and the disease severity in the new dataset is more drastically dichotomised into ‘no disease’ and ‘severe disease’, while in the old dataset, the different categories were quite evenly distributed. This indicates that the comparisons we are now going to perform underlie some potentially meaningful differences, which have to be considered. However, the distributions of IgM and IgG levels are largely comparable between the datasets (see statistical analyses).

subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                 (Isotype == 'IgM' |
                    Isotype == 'IgG')) %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("CL_IgG"), "A.CL_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PA_IgG"), "B.PA_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PC_IgG"), "C.PC_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PE_IgG"), "D.PE_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PG_IgG"), "E.PG_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PI_IgG"), "F.PI_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PS_IgG"), "G.PS_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("AnnV_IgG"), "H.AnV_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("ß2GF.PI_IgG"), "I.ß2GPI_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PT_IgG"), "J.PT_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("CL_IgM"), "K.CL_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PA_IgM"), "L.PA_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PC_IgM"), "M.PC_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PE_IgM"), "N.PE_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PG_IgM"), "O.PG_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PI_IgM"), "P.PI_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PS_IgM"), "Q.PS_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("AnnV_IgM"), "R.AnV_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("ß2GP.PI_IgM"), "S.ß2GPI_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PT_IgM"), "T.PT_IgM")) %>%
  ggplot(aes(x = aPL_type, y = as.numeric(aPL_signal))) +
    geom_boxplot(aes(color = dataset), outlier.shape = NA, add = "mean") +
    geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = dataset)) +
    geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
    stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
                 aes(group = interaction(aPL_type, dataset)), color = "darkred",
                 position = position_dodge(width = 0.8)) +
    stat_summary(fun = mean, colour = "red", 
                 position = position_dodge(width = 0.8),
                 geom = "text", vjust = -0.7, 
                 aes(label = round(..y.., digits = 1), group = interaction(aPL_type, dataset))) +
    scale_color_npg() +
    guides(x =  guide_axis(angle = 90)) +
    theme_classic()
## Warning in geom_boxplot(aes(color = dataset), outlier.shape = NA, add = "mean"):
## Ignoring unknown parameters: `add`
## Warning: The dot-dot notation (`..y..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(y)` instead.

subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                 (Isotype == 'IgM' |
                    Isotype == 'IgG')) %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("CL_IgG"), "A.CL_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PA_IgG"), "B.PA_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PC_IgG"), "C.PC_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PE_IgG"), "D.PE_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PG_IgG"), "E.PG_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PI_IgG"), "F.PI_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PS_IgG"), "G.PS_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("AnnV_IgG"), "H.AnV_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("ß2GF.PI_IgG"), "I.ß2GPI_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PT_IgG"), "J.PT_IgG")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("CL_IgM"), "K.CL_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PA_IgM"), "L.PA_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PC_IgM"), "M.PC_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PE_IgM"), "N.PE_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PG_IgM"), "O.PG_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PI_IgM"), "P.PI_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PS_IgM"), "Q.PS_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("AnnV_IgM"), "R.AnV_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("ß2GP.PI_IgM"), "S.ß2GPI_IgM")) %>%
  dplyr::mutate(aPL_type = str_replace_all(aPL_type, fixed("PT_IgM"), "T.PT_IgM")) %>%
  ggplot(aes(x = as.numeric(aPL_signal) , y = aPL_type, fill=dataset)) +
    geom_density_ridges(
      aes(point_color = dataset, point_fill = dataset, scale = 0.5),
      alpha = .8, point_alpha = 1, jittered_points = TRUE) + #bandwith could be manually adjusted
    ggtitle('All aPL grouped by aPL types') +
    xlab('aPL signal') +
    ylab('aPL types') +
    scale_color_npg() +
    scale_point_color_hue(l = 40) +
    ggplot2:::manual_scale("point_color", values = c('#E64B35B2', '#4DBBD5B2'), guide = "none") +
    theme_ridges(grid = FALSE, center = TRUE) +
    geom_vline(xintercept=50, linetype="dashed", 
               color = "red", linewidth=1)
## Scale for point_colour is already present.
## Adding another scale for point_colour, which will replace the existing scale.
## Picking joint bandwidth of 3.86

Observations:

  • The pattern look quite similar between the previously published (old) and the new datasets.
  • We, here, look only at infected AND non-infected previously published (old) versus new dataset, split for all aPL.

5.1.1.2 Female:male ratio

# All samples, including repeats

table1(~ Age + Sex + COVID_vaccination_Group
       | dataset,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                Isotype == 'IgM' &
                aPL_type == 'PT_IgM'))
new
(N=101)
old
(N=95)
Overall
(N=196)
Age
Mean (SD) 52.2 (17.6) 54.4 (16.6) 53.3 (17.1)
Median (IQR) 56.0 (37.0 to 66.0) 54.0 (41.0 to 66.5) 54.0 (39.0 to 66.0)
Range 19 to 83 18 to 86 18 to 86
Sex
F 42 (41.6%) 40 (42.1%) 82 (41.8%)
M 59 (58.4%) 55 (57.9%) 114 (58.2%)
COVID_vaccination_Group
01_Non-infected/non-vaccinated 32 (31.7%) 20 (21.1%) 52 (26.5%)
02_Infected/non-vaccinated 69 (68.3%) 75 (78.9%) 144 (73.5%)
# Unique patients

table1(~ Age + Sex + COVID_vaccination_Group
       | dataset,
       render.continuous = render.cont, render.categorical = render.cat, 
       data = subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                Isotype == 'IgM' &
                aPL_type == 'PT_IgM' &
                Timepoint==1))
new
(N=78)
old
(N=90)
Overall
(N=168)
Age
Mean (SD) 49.9 (18.3) 54.0 (16.7) 52.1 (17.5)
Median (IQR) 53.5 (31.0 to 65.0) 54.0 (41.0 to 66.0) 54.0 (37.0 to 65.0)
Range 19 to 83 18 to 86 18 to 86
Sex
F 35 (44.9%) 37 (41.1%) 72 (42.9%)
M 43 (55.1%) 53 (58.9%) 96 (57.1%)
COVID_vaccination_Group
01_Non-infected/non-vaccinated 32 (41.0%) 20 (22.2%) 52 (31.0%)
02_Infected/non-vaccinated 46 (59.0%) 70 (77.8%) 116 (69.0%)

5.1.2 We compare all aPLs against each other, not individual ones

5.1.2.1 We compare infected versus non infected in the current dataset versus the previous dataset

We compare between old and new, for non-infected/non-vaccinated (control) and for infected/non-vaccinated groups. Is there a difference between old and new datasets?

# We first do the statistics for IgM
compare_means(aPL_signal ~ dataset, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgM')),
              method = "wilcox", group.by = 'COVID_vaccination_Group', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   COVID_vaccinatio…¹ .y.   group1 group2        p   p.adj p.for…² p.sig…³ method
##   <chr>              <chr> <chr>  <chr>     <dbl>   <dbl> <chr>   <chr>   <chr> 
## 1 02_Infected/non-v… aPL_… new    old    1.98e- 2 2  e- 2 0.02    ns      Wilco…
## 2 01_Non-infected/n… aPL_… new    old    2.64e-15 5.3e-15 2.6e-15 ****    Wilco…
## # … with abbreviated variable names ¹​COVID_vaccination_Group, ²​p.format,
## #   ³​p.signif
# We then do the statistics for IgG
compare_means(aPL_signal ~ dataset, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgG')),
              method = "wilcox", group.by = 'COVID_vaccination_Group', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   COVID_vaccinatio…¹ .y.   group1 group2        p   p.adj p.for…² p.sig…³ method
##   <chr>              <chr> <chr>  <chr>     <dbl>   <dbl> <chr>   <chr>   <chr> 
## 1 02_Infected/non-v… aPL_… new    old    4.99e- 2 5  e- 2 0.05    ns      Wilco…
## 2 01_Non-infected/n… aPL_… new    old    3.24e-18 6.5e-18 <2e-16  ****    Wilco…
## # … with abbreviated variable names ¹​COVID_vaccination_Group, ²​p.format,
## #   ³​p.signif

Observations:

  • The infected/non-vaccinated group is not statistically different between old and new (IgM).
  • The non-infected/non-vaccinated group is statistically significant between old and new (IgM).
  • The infected/non-vaccinated group is not statistically different between old and new (IgG).
  • The non-infected/non-vaccinated group is statistically significant between old and new (IgG).
# We visualise
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
  (Isotype == 'IgM' |
  Isotype == 'IgG')), aes(x = interaction(Isotype, COVID_vaccination_Group), y = as.numeric(aPL_signal))) +
  geom_boxplot(aes(color = dataset), outlier.shape = NA, add = "mean") +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = dataset)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = interaction(Isotype, dataset, COVID_vaccination_Group)), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = interaction(Isotype, dataset, COVID_vaccination_Group))) +
  scale_color_npg() +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()
## Warning in geom_boxplot(aes(color = dataset), outlier.shape = NA, add = "mean"):
## Ignoring unknown parameters: `add`

We conclude, here, that in terms of overall reactivity, the previously published (old) and the current (new) datasets are comparable as they do not display a statistically significant difference among the infected individuals. However, the baseline, i.e. aPL values measured in non-infected individuals, is different between the two datasets, with higher values in the old dataset on average.

5.1.2.2 We compare between non-infected/non-vaccinated and infected/non-vaccinated groups, for old and new and different isotypes

The question we pose here is already more interesting: Is there a difference between non-infected/non-vaccinated and infected/non-vaccinated groups?

# We first do the statistics for IgM
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgM')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1       group2        p   p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>        <chr>     <dbl>   <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected… 01_No… 4.59e-11 9.2e-11 4.6e-11 ****    Wilco…
## 2 old     aPL_signal 02_Infected… 01_No… 8.68e- 1 8.7e- 1 0.87    ns      Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif
# We then do the statistics for IgG
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgG')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1         group2       p  p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>          <chr>    <dbl>  <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected/n… 01_No… 1.95e-5 1.9e-5 1.9e-05 ***     Wilco…
## 2 old     aPL_signal 02_Infected/n… 01_No… 1.54e-5 1.9e-5 1.5e-05 ***     Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif

Observations:

  • The new but not the old dataset displays significant distributional differences in terms of IgM infected versus non-infected.
  • Both old and new datasets are significantly different in terms of IgG infected versus non-infected.
# We visualise
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                (Isotype == 'IgM' |
                   Isotype == 'IgG')), aes(x = interaction(Isotype, dataset), y = as.numeric(aPL_signal))) +
  geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape = NA, add = "mean") +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = interaction(Isotype, dataset, COVID_vaccination_Group)), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = interaction(Isotype, dataset, COVID_vaccination_Group))) +
  scale_color_manual(values=c(col2)) +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()
## Warning in geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape =
## NA, : Ignoring unknown parameters: `add`

ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                (Isotype == 'IgM' |
                   Isotype == 'IgG')), aes(x = as.numeric(aPL_signal) , y = interaction(Isotype, dataset), fill=COVID_vaccination_Group)) +
  geom_density_ridges(
    aes(point_color = COVID_vaccination_Group, point_fill = COVID_vaccination_Group, scale = 0.5),
    alpha = .8, point_alpha = 1, jittered_points = TRUE) + #bandwith could be manually adjusted
  ggtitle('All aPL grouped by aPL types') +
  xlab('aPL signal') +
  ylab('aPL types') +
  scale_fill_manual(values=c(col2)) +
  scale_point_color_hue(l = 40) +
  ggplot2:::manual_scale("point_color", values = col2, guide = "none") +
  theme_ridges(grid = FALSE, center = TRUE) +
  geom_vline(xintercept=50, linetype="dashed", 
             color = "red", linewidth=1)
## Scale for point_colour is already present.
## Adding another scale for point_colour, which will replace the existing scale.
## Picking joint bandwidth of 3.35

Here, we conclude that in line with what we had observed in our previous study, we see that aPL are enriched in individuals following infection with SARS-CoV-2, both regarding IgM as well as IgG aPL.

5.1.3 We have focused on Prothrombin in the previous study - look at PT particularly, also include B2GPI and AnV

We have a particular interest in looking into Prothrombin aPL as PT has turned out to be the major finding in the previous study. Here, we follow the same approach as above when all aPL were included.

5.1.3.1 Old versus new

# We first do the statistics for IgM
compare_means(aPL_signal ~ dataset, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                          (Isotype == 'IgM') &
                          (Target == 'PT')),
              method = "wilcox", group.by = 'COVID_vaccination_Group', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   COVID_vaccination_…¹ .y.   group1 group2       p  p.adj p.for…² p.sig…³ method
##   <chr>                <chr> <chr>  <chr>    <dbl>  <dbl> <chr>   <chr>   <chr> 
## 1 02_Infected/non-vac… aPL_… new    old    2.96e-4 5.9e-4 0.0003  **      Wilco…
## 2 01_Non-infected/non… aPL_… new    old    2.37e-2 2.4e-2 0.0237  ns      Wilco…
## # … with abbreviated variable names ¹​COVID_vaccination_Group, ²​p.format,
## #   ³​p.signif
# We then do the statistics for IgG
compare_means(aPL_signal ~ dataset, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgG') &
                            (Target == 'PT')),
              method = "wilcox", group.by = 'COVID_vaccination_Group', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   COVID_vaccination_…¹ .y.   group1 group2       p  p.adj p.for…² p.sig…³ method
##   <chr>                <chr> <chr>  <chr>    <dbl>  <dbl> <chr>   <chr>   <chr> 
## 1 02_Infected/non-vac… aPL_… new    old    0.00105 0.0021 0.0011  *       Wilco…
## 2 01_Non-infected/non… aPL_… new    old    0.147   0.15   0.1472  ns      Wilco…
## # … with abbreviated variable names ¹​COVID_vaccination_Group, ²​p.format,
## #   ³​p.signif

Observations:

  • New and old datasets differ for the infected/non-vaccinated group in PT levels (IgM and IgG).
  • New and old datasets do not differ for the non-infected/non-vaccinated group in PT levels (IgM and IgG).
# We visualise
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
              (Isotype == 'IgM' |
               Isotype == 'IgG') &
              (Target == 'PT')), 
       aes(x = interaction(Isotype, COVID_vaccination_Group), y = as.numeric(aPL_signal))) +
  geom_boxplot(aes(color = dataset), outlier.shape = NA, add = "mean") +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = dataset)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = interaction(Isotype, dataset, COVID_vaccination_Group)), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = interaction(Isotype, dataset, COVID_vaccination_Group))) +
  scale_color_npg() +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()
## Warning in geom_boxplot(aes(color = dataset), outlier.shape = NA, add = "mean"):
## Ignoring unknown parameters: `add`

We conclude that the PT measurements for both IgM and IgG yielded higher values in the old versus the new dataset throughout, in both infected and in non-infected individuals.

5.1.3.2 Statistics non-infected/non-vaccinated versus infected/non-vaccinated PT

# We first do the statistics for IgM
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                          (Isotype == 'IgM') &
                          (Target == 'PT')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1         group2       p  p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>          <chr>    <dbl>  <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected/n… 01_No… 0.00251 0.0034 0.0025  *       Wilco…
## 2 old     aPL_signal 02_Infected/n… 01_No… 0.00335 0.0034 0.0034  *       Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif
# We then do the statistics for IgG
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgG') &
                            (Target == 'PT')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1           group2      p p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>            <chr>   <dbl> <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected/non… 01_No… 0.0705  0.14 0.071   ns      Wilco…
## 2 old     aPL_signal 02_Infected/non… 01_No… 0.753   0.75 0.753   ns      Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif

Observations:

  • Old and new dataset display significant difference infected versus non-infected PT (IgM).
  • Old and new dataset display no significant difference infected versus non-infected PT (IgG).
# We visualise
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
              (Isotype == 'IgM' |
               Isotype == 'IgG') &
              (Target == 'PT')), 
       aes(x = interaction(Isotype, dataset), y = as.numeric(aPL_signal))) +
  geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape = NA, add = "mean") +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = interaction(Isotype, dataset, COVID_vaccination_Group)), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = interaction(Isotype, dataset, COVID_vaccination_Group))) +
  scale_color_manual(values=c(col2)) +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()
## Warning in geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape =
## NA, : Ignoring unknown parameters: `add`

We conclude that, importantly, one of the principal findings from our previous study is validated: We observe a significant difference in infected versus non-infected individuals for PT IgM, but not for IgG, in the new dataset, as seen in the old already published dataset.

5.1.3.3 Statistics non-infected/non-vaccinated versus infected/non-vaccinated B2GPI

# We first do the statistics for IgM
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                          (Isotype == 'IgM') &
                          (Target == 'ß2GPI')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars,p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1         group2       p  p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>          <chr>    <dbl>  <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected/n… 01_No… 4.40e-7 8.8e-7 4.4e-07 ****    Wilco…
## 2 old     aPL_signal 02_Infected/n… 01_No… 2.51e-3 2.5e-3 0.0025  *       Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif
# We then do the statistics for IgG
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgG') &
                            (Target == 'ß2GPI')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars,p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1         group2       p  p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>          <chr>    <dbl>  <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected/n… 01_No… 0.00161 0.0032 0.0016  *       Wilco…
## 2 old     aPL_signal 02_Infected/n… 01_No… 0.124   0.12   0.1244  ns      Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif

Observations:

  • Differences between infected and non-infected patients in IgG are slightly significant in the new, but not in the old dataset, for IgG.
  • Differences between infected and non-infected patients in IgM, in the new or old dataset, are significant, and the significance is more robust in the new dataset.
# We visualise
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
              (Isotype == 'IgM' |
               Isotype == 'IgG') &
              (Target == 'ß2GPI')), 
       aes(x = interaction(Isotype, dataset), y = as.numeric(aPL_signal))) +
  geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape = NA, add = "mean") +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = interaction(Isotype, dataset, COVID_vaccination_Group)), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = interaction(Isotype, dataset, COVID_vaccination_Group))) +
  scale_color_manual(values=c(col2)) +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()
## Warning in geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape =
## NA, : Ignoring unknown parameters: `add`

5.1.3.4 Statistics non-infected/non-vaccinated versus infected/non-vaccinated AnV

# We first do the statistics for IgM
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                          (Isotype == 'IgM') &
                          (Target == 'AnnV')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1         group2       p  p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>          <chr>    <dbl>  <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected/n… 01_No… 4.28e-6 8.6e-6 4.3e-06 ****    Wilco…
## 2 old     aPL_signal 02_Infected/n… 01_No… 1.36e-1 1.4e-1 0.14    ns      Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif
# We then do the statistics for IgG
compare_means(aPL_signal ~ COVID_vaccination_Group, 
              data=subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
                            (Isotype == 'IgG') &
                            (Target == 'AnnV')),
              method = "wilcox", group.by = 'dataset', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 2 × 9
##   dataset .y.        group1            group2     p p.adj p.for…¹ p.sig…² method
##   <chr>   <chr>      <chr>             <chr>  <dbl> <dbl> <chr>   <chr>   <chr> 
## 1 new     aPL_signal 02_Infected/non-… 01_No… 0.645  0.76 0.64    ns      Wilco…
## 2 old     aPL_signal 02_Infected/non-… 01_No… 0.762  0.76 0.76    ns      Wilco…
## # … with abbreviated variable names ¹​p.format, ²​p.signif

Observations:

  • Differences between infected and non-infected patients in IgG, in the new or old dataset, are not significant. This confirms previous results.
  • AnV, in our previous publication, was significantly different in infected versus non-infected individuals after binarisation of the dataset, with Fisher’s exact test. However, the distributions were not significantly different using Wilcoxon rank sum test after BH correction. In the new dataset, the distributions are significantly different also with Wilcoxon rank sum test. This confirms the previous observation that AnV is enriched in patients with/after infection with SARS-CoV-2.
# We visualise
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
              (Isotype == 'IgM' |
               Isotype == 'IgG') &
              (Target == 'AnnV')), 
       aes(x = interaction(Isotype, dataset), y = as.numeric(aPL_signal))) +
  geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape = NA, add = "mean") +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = interaction(Isotype, dataset, COVID_vaccination_Group)), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = interaction(Isotype, dataset, COVID_vaccination_Group))) +
  scale_color_manual(values=c(col2)) +
  guides(x =  guide_axis(angle = 90)) +
  theme_classic()
## Warning in geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape =
## NA, : Ignoring unknown parameters: `add`

# We visualise

comp_PT_B2GPI_AnV <-
ggplot(subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
              (Isotype == 'IgM' |
               Isotype == 'IgG') &
              (Target == 'PT' |
                Target == 'ß2GPI' |
                Target == 'AnnV')), 
       aes(x = interaction(Isotype, dataset), y = as.numeric(aPL_signal))) +
  geom_boxplot(aes(color = COVID_vaccination_Group), outlier.shape = NA, add = "mean") +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = interaction(Isotype, dataset, COVID_vaccination_Group)), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = interaction(Isotype, dataset, COVID_vaccination_Group))) +
  scale_color_manual(values=c(col2)) +
  guides(x =  guide_axis(angle = 90)) +
  facet_wrap(~Target,ncol=3) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL level",x="",title="Comparison infected versus non-infected in old and new datasets") +
  theme_classic()
comp_PT_B2GPI_AnV

comp_PT_B2GPI_AnV_hist <-
subset(published_data_comparison_NA, 
               (COVID_vaccination_Group == '01_Non-infected/non-vaccinated' |
                COVID_vaccination_Group == '02_Infected/non-vaccinated') &
              (Isotype == 'IgM' |
               Isotype == 'IgG') &
              (Target == 'PT' |
                Target == 'ß2GPI' |
                Target == 'AnnV')) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
  gghistogram(x = "aPL_signal", 
    y = "..density..",
    #add = "mean", 
    rug = FALSE,
    fill = "COVID_vaccination_Group", 
    palette = c(col2),
    #add_density = TRUE
    ) +
    facet_grid(rows=vars(Isotype), cols= vars(Target, dataset), scales='free_y', switch='y') +
    theme(axis.text.y = element_text(size = 8)) +
    labs(y="Density",x="aPL level",title="") +
    theme(legend.position = 'none')

ggarrange(Cohort_comparison_sexage, Cohort_comparison_DPO, Cohort_comparison_severity, Cohort_comparison_aPL, comp_PT_B2GPI_AnV_hist,
          labels = c("A", "B", 'C', 'D', 'E'),
          ncol = 2, nrow = 3)

5.2 Are they modulated by the strength of the antibody response against SARS-CoV-2 proteins?

5.2.1 We plot the overall aPL as a function of PC1_IgG and PC1_IgA.

# Variant 1
aPL_PC1_IgG <-
 ggplot(data=subset(data_vertical, CoV2_type == 'Spike_IgG'), aes(x = PC1_IgG, y = aPL_signal, color=Isotype)) +
  geom_point() +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype)) +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype), lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  scale_fill_manual(values=c(col4)) +
  scale_color_manual(values=c(col4)) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgG",title="all aPL versus SARS-CoV-2 PC1 IgG")
aPL_PC1_IgG
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).

# Variant 2
ggscatter(
  data=subset(data_vertical, CoV2_type == 'Spike_IgG'), 
  x = "PC1_IgG", 
  y = "aPL_signal",
  color = "Isotype", 
  palette = c(col4),
  add = "reg.line",
  cor.method = 'spearman'
) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_cor(aes(color = Isotype),method='spearman') +
  labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgG",title="all aPL versus SARS-CoV-2 PC1 IgG") +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).

aPL_PC1_IgG
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).

# Variant 1
aPL_PC1_IgA <-
 ggplot(data=subset(data_vertical, CoV2_type == 'Spike_IgG'), aes(x = PC1_IgA, y = aPL_signal, color=Isotype)) +
  geom_point() +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype)) +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype), lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  scale_fill_manual(values=c(col4)) +
  scale_color_manual(values=c(col4)) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgA",title="all aPL versus SARS-CoV-2 PC1 IgA")
aPL_PC1_IgA
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).

# Variant 2
ggscatter(
  data=subset(data_vertical, CoV2_type == 'Spike_IgG'), 
  x = "PC1_IgA", 
  y = "aPL_signal",
  color = "Isotype", 
  palette = c(col4),
  add = "reg.line",
  cor.method = 'spearman'
) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_cor(aes(color = Isotype),method='spearman') +
  labs(y="aPL signal",x="anti-SARS-CoV-2 PC1 IgA",title="all aPL versus SARS-CoV-2 PC1 IgA") +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).

aPL_PC1_IgA
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).

Observations:

  • aPL IgG, IgA, and IgM, in general, do not show a linear correlation with anti-SARS-CoV-2 IgG.
  • aPL IgG, IgA, and IgM, in general, do not show a linear correlation with anti-SARS-CoV-2 IgA.

5.2.2 We plot PT, B2GPI, AnV, and CL specifically as a function of PC1_IgG and PC1_IGA.

# Variant 1
PTB2GPIAnV_PC1_IgG <-
subset(data_vertical, 
              CoV2_type == 'Spike_IgG' & 
              Target == 'PT' |
              Target == 'ß2GPI' |
              Target == 'AnnV' |
              Target == 'CL') %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("CL"), "D.CL")) %>%  
 ggplot(aes(x = PC1_IgG, y = aPL_signal, color=Isotype)) +
  geom_point() +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype)) +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype), lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  scale_fill_manual(values=c(col4)) +
  scale_color_manual(values=c(col4)) +
  facet_wrap(~Target,ncol=4) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgG",title="PT, B2GPI, AnV, and CL aPL versus SARS-CoV-2 PC1 IgG")
PTB2GPIAnV_PC1_IgG
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 360 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 360 rows containing missing values (`geom_point()`).

# Variant 2
subset(data_vertical, 
              CoV2_type == 'Spike_IgG' & 
              Target == 'PT' |
              Target == 'ß2GPI' |
              Target == 'AnnV' |
              Target == 'CL') %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("CL"), "D.CL")) %>%  
  ggscatter( 
    x = "PC1_IgG", 
    y = "aPL_signal",
    color = "Isotype", 
    palette = c(col4),
    add = "reg.line",
    cor.method = 'spearman'
    ) +
    geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
    stat_cor(aes(color = Isotype), method='spearman') +
    facet_wrap(~Target,ncol=4) +
    labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgG",title="PT, B2GPI, AnV, and CL aPL versus SARS-CoV-2 PC1 IgG") +
    theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 360 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 360 rows containing missing values (`geom_point()`).

# Variant 1
PTB2GPIAnV_PC1_IgA <-
subset(data_vertical, 
              CoV2_type == 'Spike_IgG' & 
              Target == 'PT' |
              Target == 'ß2GPI' |
              Target == 'AnnV' |
              Target == 'CL') %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("CL"), "D.CL")) %>%  
 ggplot(aes(x = PC1_IgA, y = aPL_signal, color=Isotype)) +
  geom_point() +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype)) +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype), lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  scale_fill_manual(values=c(col4)) +
  scale_color_manual(values=c(col4)) +
  facet_wrap(~Target,ncol=4) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgA",title="PT, B2GPI, AnV, and CL aPL versus SARS-CoV-2 PC1 IgA")
PTB2GPIAnV_PC1_IgA
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 360 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 360 rows containing missing values (`geom_point()`).

# Variant 2
subset(data_vertical, 
              CoV2_type == 'Spike_IgG' & 
              Target == 'PT' |
              Target == 'ß2GPI' |
              Target == 'AnnV' |
              Target == 'CL') %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("CL"), "D.CL")) %>%  
  ggscatter( 
            x = "PC1_IgA", 
            y = "aPL_signal",
            color = "Isotype", 
            palette = c(col4),
            add = "reg.line",
            cor.method = 'spearman'
    ) +
    geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
    stat_cor(aes(color = Isotype),method='spearman') +
    facet_wrap(~Target,ncol=4) +
    labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgA",title="PT, B2GPI, AnV, and CL aPL versus SARS-CoV-2 PC1 IgA") +
    theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 360 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 360 rows containing missing values (`geom_point()`).

ggarrange(aPL_PC1_IgG, aPL_PC1_IgA, PTB2GPIAnV_PC1_IgG, PTB2GPIAnV_PC1_IgA,
          labels = c("A", "B", 'C', 'D'),
          ncol = 2, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 900 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 900 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 900 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 360 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 360 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 360 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 360 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 360 rows containing missing values (`geom_point()`).

5.2.3 We plot PT, B2GPI, and AnV specifically as a function of age and sex.

# Variant 1
subset(data_vertical, 
              CoV2_type == 'Spike_IgG' & 
              Target == 'PT' |
              Target == 'ß2GPI' |
              Target == 'AnnV' |
              Target == 'CL') %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("CL"), "D.CL")) %>%  
 ggplot(aes(x = Age, y = aPL_signal, color=Isotype)) +
  geom_point() +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype)) +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype), lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  scale_fill_manual(values=c(col4)) +
  scale_color_manual(values=c(col4)) +
  facet_wrap(~Target,ncol=4) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL signal",x="anti-Sars-CoV-2 PC1 IgA",title="PT, B2GPI, and AnV aPL versus SARS-CoV-2 PC1 IgA")
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 56 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 56 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 56 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 56 rows containing missing values (`geom_point()`).

# Variant 2
subset(data_vertical, 
              CoV2_type == 'Spike_IgG' & 
              Target == 'PT' |
              Target == 'ß2GPI' |
              Target == 'AnnV' |
              Target == 'CL') %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("CL"), "D.CL")) %>%  
  ggscatter(
  x = "Age", 
  y = "aPL_signal",
  color = "Isotype",
  shape='Sex',
  palette = c(col4),
  add = "reg.line",
  cor.method = 'spearman'
  ) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_cor(aes(color = Isotype), method='spearman') +
  facet_wrap(~Target,ncol=4) +
  labs(y="aPL signal",x="Age",title="PT, B2GPI, and AnV aPL versus Age") +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 56 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 56 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 56 rows containing missing values (`geom_point()`).

5.3 Are there other factors that influence the aPL levels (in line or not with previous finding)?

We obviously focus on PT_IgM, B2GPI_IgM, AnV_IgM, and CL IgG.

5.3.1 We shape the data for regression

data_aPL_regr <- data %>% 
  dplyr::select(Unique_sample_ID_for_study,
                Thrombosis_group,
                CL_IgG:PT_IgA, aPL_IgG_rowmean:aPL_IgA_rowmean,
                Spike_IgG:NC_IgA, PC1_IgG, PC1_IgA,
                GCSF:Inflammatory_index,
                Sex, Age,
                SeverityCoV2_or_Flu, Acute_SARS_CoV_2_infection,
                Anticoagulation.at.event, Anticoagulation.chronic,
                Platelet.aggregation.inhibitor.at.event, Platelet.aggregation.inhibitor.chronic,
                Immunosuppressed_admission, Vaccination_statusonly_CoV2
  ) %>% 
  dplyr::mutate(Age=(Age-mean(Age))/sd(Age)
  ) %>%
  na.omit() # We remove those patients/samples with missing (NA) values. This has consequences for later and is annotated there as well.

data_aPL_regr$SeverityCoV2_or_Flu_int <- as.numeric(gsub("\\D", "", data_aPL_regr$SeverityCoV2_or_Flu)) # We want to use this as a score
data_aPL_regr$SeverityCoV2_or_Flu_int = as.numeric(data_aPL_regr$SeverityCoV2_or_Flu_int)

data_aPL_regr <- data_aPL_regr %>% 
  dplyr::select(-SeverityCoV2_or_Flu)

data_aPL_regr$Vaccination_statusonly_CoV2 = as.numeric(data_aPL_regr$Vaccination_statusonly_CoV2)
data_aPL_regr$Thrombosis_group = as.numeric(data_aPL_regr$Thrombosis_group)

data_aPL_regr_plot_PT_IgM <- data_aPL_regr %>% 
  dplyr::filter(PT_IgM > 1)

data_aPL_regr_plot_AnV_IgM <- data_aPL_regr %>% 
  dplyr::filter(AnnV_IgM > 1)

data_aPL_regr_plot_B2GPI_IgM <- data_aPL_regr %>% 
  dplyr::filter(ß2GPI_IgM > 1)

data_aPL_regr_plot_CL_IgG <- data_aPL_regr %>% 
  dplyr::filter(CL_IgG > 1)

5.3.2 We investigate the features using Boruta

We explain Boruta in depth in another chapter. Quintessentially, we conduct a random forest regression to identify which of the parameters are important for predicting the outcome, in this case PT, B2GPI, or AnV IgM.

set.seed(1)

# PT_IgM

boruta_PT_IgM <- Boruta(PT_IgM ~ PC1_IgG + PC1_IgA +
                          Spike_IgG + RBD_IgG + NC_IgG + 
                          Spike_IgA + RBD_IgA + NC_IgA +
                          GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                          MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                          Acute_SARS_CoV_2_infection + 
                          Anticoagulation.at.event + 
                          Platelet.aggregation.inhibitor.at.event +
                          Immunosuppressed_admission +
                          Vaccination_statusonly_CoV2 +
                          SeverityCoV2_or_Flu_int + 
                          Sex + Age,
                        data=na.omit(data_aPL_regr), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_PT_IgM_signif <- names(boruta_PT_IgM$finalDecision[boruta_PT_IgM$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_PT_IgM_signif)  # significant variables
## [1] "Age"       "TNFalpha"  "MIP1alpha" "Spike_IgA" "NC_IgG"    "RBD_IgG"  
## [7] "PC1_IgG"
plot(boruta_PT_IgM, cex.axis=.5, las=2, xlab="", main="Variable Importance PT IgM all")  # plot variable importance

boruta_PT_IgM_02 <- Boruta(PT_IgM ~ PC1_IgG + PC1_IgA +
                          Spike_IgG + RBD_IgG + NC_IgG + 
                          Spike_IgA + RBD_IgA + NC_IgA +
                          GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                          MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                          Acute_SARS_CoV_2_infection + 
                          Anticoagulation.at.event + 
                          Platelet.aggregation.inhibitor.at.event +
                          Immunosuppressed_admission +
                          Vaccination_statusonly_CoV2 +
                          SeverityCoV2_or_Flu_int + 
                          Sex + Age,
                        data=na.omit(data_aPL_regr_plot_PT_IgM), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_PT_IgM_signif_02 <- names(boruta_PT_IgM_02$finalDecision[boruta_PT_IgM_02$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_PT_IgM_signif_02)  # significant variables
## [1] "SeverityCoV2_or_Flu_int"     "Vaccination_statusonly_CoV2"
## [3] "Acute_SARS_CoV_2_infection"  "TNFalpha"                   
## [5] "S100A8_A9"                   "IP10"                       
## [7] "IL8"                         "NC_IgG"                     
## [9] "PC1_IgG"
plot(boruta_PT_IgM_02, cex.axis=.5, las=2, xlab="", main="Variable Importance PT IgM >1")  # plot variable importance

# B2GPI_IgM

boruta_ß2GPI_IgM <- Boruta(ß2GPI_IgM ~ PC1_IgG + PC1_IgA +
                          Spike_IgG + RBD_IgG + NC_IgG + 
                          Spike_IgA + RBD_IgA + NC_IgA +
                          GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                          MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                          Acute_SARS_CoV_2_infection + 
                          Anticoagulation.at.event + 
                          Platelet.aggregation.inhibitor.at.event +
                          Immunosuppressed_admission +
                          Vaccination_statusonly_CoV2 +
                          SeverityCoV2_or_Flu_int + 
                          Sex + Age,
                        data=na.omit(data_aPL_regr), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_ß2GPI_IgM_signif <- names(boruta_ß2GPI_IgM$finalDecision[boruta_ß2GPI_IgM$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_ß2GPI_IgM_signif)  # significant variables
## [1] "Age"       "TNFalpha"  "MIP1alpha" "NC_IgA"    "RBD_IgA"   "Spike_IgA"
## [7] "PC1_IgG"
plot(boruta_ß2GPI_IgM, cex.axis=.5, las=2, xlab="", main="Variable Importance ß2GPI IgM all")  # plot variable importance

boruta_ß2GPI_IgM_02 <- Boruta(ß2GPI_IgM ~ PC1_IgG + PC1_IgA +
                          Spike_IgG + RBD_IgG + NC_IgG + 
                          Spike_IgA + RBD_IgA + NC_IgA +
                          GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                          MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                          Acute_SARS_CoV_2_infection + 
                          Anticoagulation.at.event + 
                          Platelet.aggregation.inhibitor.at.event +
                          Immunosuppressed_admission +
                          Vaccination_statusonly_CoV2 +
                          SeverityCoV2_or_Flu_int + 
                          Sex + Age,
                        data=na.omit(data_aPL_regr_plot_B2GPI_IgM), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_ß2GPI_IgM_signif_02 <- names(boruta_ß2GPI_IgM_02$finalDecision[boruta_ß2GPI_IgM_02$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_ß2GPI_IgM_signif_02)  # significant variables
## [1] "Age"                     "SeverityCoV2_or_Flu_int"
## [3] "TNFalpha"                "SDF1alpha"              
## [5] "IL17A"                   "IL4"                    
## [7] "NC_IgA"                  "RBD_IgA"                
## [9] "NC_IgG"
plot(boruta_ß2GPI_IgM_02, cex.axis=.5, las=2, xlab="", main="Variable Importance ß2GPI IgM >1")  # plot variable importance

# AnV_IgM

boruta_AnnV_IgM <- Boruta(AnnV_IgM ~ PC1_IgG + PC1_IgA +
                          Spike_IgG + RBD_IgG + NC_IgG + 
                          Spike_IgA + RBD_IgA + NC_IgA +
                          GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                          MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                          Acute_SARS_CoV_2_infection + 
                          Anticoagulation.at.event + 
                          Platelet.aggregation.inhibitor.at.event +
                          Immunosuppressed_admission +
                          Vaccination_statusonly_CoV2 +
                          SeverityCoV2_or_Flu_int + 
                          Sex + Age,
                      data=na.omit(data_aPL_regr), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_AnnV_IgM_signif <- names(boruta_AnnV_IgM$finalDecision[boruta_AnnV_IgM$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_AnnV_IgM_signif)  # significant variables
##  [1] "Age"                         "SeverityCoV2_or_Flu_int"    
##  [3] "Vaccination_statusonly_CoV2" "Acute_SARS_CoV_2_infection" 
##  [5] "TNFalpha"                    "IL17A"                      
##  [7] "RBD_IgA"                     "NC_IgG"                     
##  [9] "PC1_IgA"                     "PC1_IgG"
plot(boruta_AnnV_IgM, cex.axis=.5, las=2, xlab="", main="Variable Importance AnV IgM all")  # plot variable importance

boruta_AnnV_IgM_02 <- Boruta(AnnV_IgM ~ PC1_IgG + PC1_IgA +
                          Spike_IgG + RBD_IgG + NC_IgG + 
                          Spike_IgA + RBD_IgA + NC_IgA +
                          GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                          MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                          Acute_SARS_CoV_2_infection + 
                          Anticoagulation.at.event + 
                          Platelet.aggregation.inhibitor.at.event +
                          Immunosuppressed_admission +
                          Vaccination_statusonly_CoV2 +
                          SeverityCoV2_or_Flu_int + 
                          Sex + Age,
                      data=na.omit(data_aPL_regr_plot_AnV_IgM), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_AnnV_IgM_signif_02 <- names(boruta_AnnV_IgM_02$finalDecision[boruta_AnnV_IgM_02$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_AnnV_IgM_signif_02)  # significant variables
## [1] "Age"                        "SeverityCoV2_or_Flu_int"   
## [3] "Acute_SARS_CoV_2_infection" "TNFalpha"                  
## [5] "IL17A"                      "IL4"                       
## [7] "NC_IgG"                     "PC1_IgG"
plot(boruta_AnnV_IgM_02, cex.axis=.5, las=2, xlab="", main="Variable Importance AnV IgM >1")  # plot variable importance

# CL_IgG

boruta_CL_IgG <- Boruta(CL_IgG ~ PC1_IgG + PC1_IgA +
                            Spike_IgG + RBD_IgG + NC_IgG + 
                            Spike_IgA + RBD_IgA + NC_IgA +
                            GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                            MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                            Acute_SARS_CoV_2_infection + 
                            Anticoagulation.at.event + 
                            Platelet.aggregation.inhibitor.at.event +
                            Immunosuppressed_admission +
                            Vaccination_statusonly_CoV2 +
                            SeverityCoV2_or_Flu_int + 
                            Sex + Age,
                          data=na.omit(data_aPL_regr), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_CL_IgG_signif <- names(boruta_CL_IgG$finalDecision[boruta_CL_IgG$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_CL_IgG_signif)  # significant variables
## [1] "IL8"       "IL4"       "RBD_IgA"   "Spike_IgA" "NC_IgG"    "PC1_IgA"  
## [7] "PC1_IgG"
plot(boruta_CL_IgG, cex.axis=.5, las=2, xlab="", main="Variable Importance CL IgG all")  # plot variable importance

boruta_CL_IgG_02 <- Boruta(CL_IgG ~ PC1_IgG + PC1_IgA +
                               Spike_IgG + RBD_IgG + NC_IgG + 
                               Spike_IgA + RBD_IgA + NC_IgA +
                               GCSF + GMCSF + IFNalpha + IFNgamma + IL1beta + IL4 + IL6 + IL8 + IL10 + IL17A + IP10 + MIP1alpha +
                               MIP1beta + S100A8_A9 + SDF1alpha + TNFalpha + Inflammatory_index +
                               Acute_SARS_CoV_2_infection + 
                               Anticoagulation.at.event + 
                               Platelet.aggregation.inhibitor.at.event +
                               Immunosuppressed_admission +
                               Vaccination_statusonly_CoV2 +
                               SeverityCoV2_or_Flu_int + 
                               Sex + Age,
                             data=na.omit(data_aPL_regr_plot_CL_IgG), doTrace=2, maxRuns=1000)  # perform Boruta search

boruta_CL_IgG_signif_02 <- names(boruta_CL_IgG_02$finalDecision[boruta_CL_IgG_02$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_CL_IgG_signif_02)  # significant variables
## [1] "Age"       "SDF1alpha" "MIP1alpha"
plot(boruta_CL_IgG_02, cex.axis=.5, las=2, xlab="", main="Variable Importance CL IgG >1")  # plot variable importance

Conclusions:

Overall, we observe that mostly antibodies against SARS-CoV-2 are associated, with disease severity, acute infection, age, and some cytokines, particularly TNF alpha, being modulators. The link between aPL and TNF alpha has been described before:

5.3.3 We build a model and assess it for its predictive value

  1. We build models incorporating most of the features found important, both for all values as well as for those only including patients with respective values > 1;
  2. We run a stepAIC to simplify the model to include the most important parameters;
  3. We compare model predictions with original outcomes.
  4. For this model, we use the dataset where datapoints <1 are removed. Importantly, we seek to find associations with respective aPL levels, not with absence of levels.
# For the aPL, we remove data points <1; hence, we try to model associations WITH aPL levels!

# PT model

PT_IgM_model1 <- glm(PT_IgM ~ NC_IgG + PC1_IgG +
                       SeverityCoV2_or_Flu_int +
                       Vaccination_statusonly_CoV2 + Acute_SARS_CoV_2_infection +
                       TNFalpha + S100A8_A9 + IP10 + IL8 + Age + MIP1alpha + Spike_IgA + RBD_IgG,
                     data = data_aPL_regr_plot_PT_IgM, 
                     family = gaussian(link = 'identity'))

PT_IgM_model1_AIC <- stepAIC(PT_IgM_model1, trace=TRUE)
## Start:  AIC=386.37
## PT_IgM ~ NC_IgG + PC1_IgG + SeverityCoV2_or_Flu_int + Vaccination_statusonly_CoV2 + 
##     Acute_SARS_CoV_2_infection + TNFalpha + S100A8_A9 + IP10 + 
##     IL8 + Age + MIP1alpha + Spike_IgA + RBD_IgG
## 
##                               Df Deviance    AIC
## - Spike_IgA                    1   5407.2 384.41
## - Vaccination_statusonly_CoV2  1   5422.0 384.54
## - Acute_SARS_CoV_2_infection   1   5428.8 384.60
## - SeverityCoV2_or_Flu_int      1   5453.2 384.81
## - S100A8_A9                    1   5492.4 385.15
## - TNFalpha                     1   5492.7 385.15
## - RBD_IgG                      1   5497.7 385.19
## - PC1_IgG                      1   5536.4 385.52
## - IL8                          1   5540.3 385.55
## - IP10                         1   5559.3 385.71
## - Age                          1   5589.2 385.97
## <none>                             5402.6 386.37
## - MIP1alpha                    1   5724.9 387.09
## - NC_IgG                       1   5972.6 389.09
## 
## Step:  AIC=384.41
## PT_IgM ~ NC_IgG + PC1_IgG + SeverityCoV2_or_Flu_int + Vaccination_statusonly_CoV2 + 
##     Acute_SARS_CoV_2_infection + TNFalpha + S100A8_A9 + IP10 + 
##     IL8 + Age + MIP1alpha + RBD_IgG
## 
##                               Df Deviance    AIC
## - Vaccination_statusonly_CoV2  1   5429.3 382.60
## - Acute_SARS_CoV_2_infection   1   5436.7 382.67
## - SeverityCoV2_or_Flu_int      1   5463.6 382.90
## - TNFalpha                     1   5493.9 383.16
## - S100A8_A9                    1   5500.4 383.21
## - RBD_IgG                      1   5519.0 383.37
## - IP10                         1   5578.7 383.88
## - PC1_IgG                      1   5580.7 383.90
## - Age                          1   5593.3 384.00
## <none>                             5407.2 384.41
## - IL8                          1   5666.2 384.61
## - MIP1alpha                    1   5738.1 385.20
## - NC_IgG                       1   6041.6 387.63
## 
## Step:  AIC=382.6
## PT_IgM ~ NC_IgG + PC1_IgG + SeverityCoV2_or_Flu_int + Acute_SARS_CoV_2_infection + 
##     TNFalpha + S100A8_A9 + IP10 + IL8 + Age + MIP1alpha + RBD_IgG
## 
##                              Df Deviance    AIC
## - Acute_SARS_CoV_2_infection  1   5452.6 380.80
## - S100A8_A9                   1   5502.0 381.23
## - SeverityCoV2_or_Flu_int     1   5507.6 381.28
## - TNFalpha                    1   5517.7 381.36
## - RBD_IgG                     1   5523.9 381.42
## - PC1_IgG                     1   5581.2 381.90
## - IP10                        1   5591.3 381.98
## - Age                         1   5616.8 382.20
## <none>                            5429.3 382.60
## - IL8                         1   5703.4 382.92
## - MIP1alpha                   1   5747.3 383.28
## - NC_IgG                      1   6057.1 385.75
## 
## Step:  AIC=380.8
## PT_IgM ~ NC_IgG + PC1_IgG + SeverityCoV2_or_Flu_int + TNFalpha + 
##     S100A8_A9 + IP10 + IL8 + Age + MIP1alpha + RBD_IgG
## 
##                           Df Deviance    AIC
## - S100A8_A9                1   5517.8 379.36
## - TNFalpha                 1   5528.9 379.46
## - SeverityCoV2_or_Flu_int  1   5540.0 379.55
## - RBD_IgG                  1   5595.7 380.02
## - IP10                     1   5615.7 380.19
## - PC1_IgG                  1   5673.0 380.67
## <none>                         5452.6 380.80
## - Age                      1   5692.6 380.83
## - IL8                      1   5724.7 381.09
## - MIP1alpha                1   5765.0 381.42
## - NC_IgG                   1   6271.6 385.38
## 
## Step:  AIC=379.36
## PT_IgM ~ NC_IgG + PC1_IgG + SeverityCoV2_or_Flu_int + TNFalpha + 
##     IP10 + IL8 + Age + MIP1alpha + RBD_IgG
## 
##                           Df Deviance    AIC
## - TNFalpha                 1   5586.9 377.95
## - SeverityCoV2_or_Flu_int  1   5601.1 378.07
## - RBD_IgG                  1   5672.2 378.66
## - IP10                     1   5700.2 378.89
## <none>                         5517.8 379.36
## - Age                      1   5781.9 379.56
## - IL8                      1   5793.1 379.65
## - PC1_IgG                  1   5812.9 379.81
## - MIP1alpha                1   5914.0 380.62
## - NC_IgG                   1   6539.3 385.35
## 
## Step:  AIC=377.95
## PT_IgM ~ NC_IgG + PC1_IgG + SeverityCoV2_or_Flu_int + IP10 + 
##     IL8 + Age + MIP1alpha + RBD_IgG
## 
##                           Df Deviance    AIC
## - SeverityCoV2_or_Flu_int  1   5629.6 376.31
## - IP10                     1   5716.2 377.02
## - RBD_IgG                  1   5733.5 377.17
## <none>                         5586.9 377.95
## - IL8                      1   5838.3 378.02
## - PC1_IgG                  1   5859.7 378.19
## - Age                      1   5881.6 378.36
## - MIP1alpha                1   5970.9 379.07
## - NC_IgG                   1   6590.3 383.71
## 
## Step:  AIC=376.31
## PT_IgM ~ NC_IgG + PC1_IgG + IP10 + IL8 + Age + MIP1alpha + RBD_IgG
## 
##             Df Deviance    AIC
## - RBD_IgG    1   5759.7 375.38
## - IP10       1   5779.9 375.54
## - PC1_IgG    1   5870.7 376.28
## <none>           5629.6 376.31
## - IL8        1   5913.4 376.62
## - Age        1   5929.7 376.75
## - MIP1alpha  1   5971.4 377.08
## - NC_IgG     1   6657.5 382.19
## 
## Step:  AIC=375.38
## PT_IgM ~ NC_IgG + PC1_IgG + IP10 + IL8 + Age + MIP1alpha
## 
##             Df Deviance    AIC
## - IP10       1   5846.6 374.08
## - PC1_IgG    1   5940.4 374.83
## <none>           5759.7 375.38
## - IL8        1   6089.9 376.00
## - Age        1   6166.4 376.59
## - MIP1alpha  1   6175.4 376.65
## - NC_IgG     1   7151.2 383.55
## 
## Step:  AIC=374.08
## PT_IgM ~ NC_IgG + PC1_IgG + IL8 + Age + MIP1alpha
## 
##             Df Deviance    AIC
## - PC1_IgG    1   5974.5 373.10
## <none>           5846.6 374.08
## - IL8        1   6189.9 374.77
## - MIP1alpha  1   6260.9 375.30
## - Age        1   6436.6 376.60
## - NC_IgG     1   7153.4 381.56
## 
## Step:  AIC=373.1
## PT_IgM ~ NC_IgG + IL8 + Age + MIP1alpha
## 
##             Df Deviance    AIC
## <none>           5974.5 373.10
## - IL8        1   6345.2 373.93
## - MIP1alpha  1   6519.4 375.20
## - Age        1   6798.1 377.17
## - NC_IgG     1   8909.4 389.88
# B2GPI model

ß2GPI_IgM_model1 <- glm(ß2GPI_IgM ~ NC_IgG +
                          NC_IgA + RBD_IgA +
                          SeverityCoV2_or_Flu_int +
                          Vaccination_statusonly_CoV2 +
                          Age +
                          IL4 + TNFalpha + SDF1alpha + IL17A + Age + MIP1alpha + PC1_IgG,
                        data = data_aPL_regr_plot_B2GPI_IgM, 
                        family = gaussian(link = 'identity'))

ß2GPI_IgM_model1_AIC <- stepAIC(ß2GPI_IgM_model1, trace=TRUE)
## Start:  AIC=708.47
## ß2GPI_IgM ~ NC_IgG + NC_IgA + RBD_IgA + SeverityCoV2_or_Flu_int + 
##     Vaccination_statusonly_CoV2 + Age + IL4 + TNFalpha + SDF1alpha + 
##     IL17A + Age + MIP1alpha + PC1_IgG
## 
##                               Df Deviance    AIC
## - PC1_IgG                      1    11757 706.48
## - Age                          1    11768 706.56
## - TNFalpha                     1    11834 707.05
## - RBD_IgA                      1    11868 707.31
## - NC_IgG                       1    11891 707.48
## - MIP1alpha                    1    11912 707.64
## - SeverityCoV2_or_Flu_int      1    12000 708.29
## <none>                              11756 708.47
## - IL17A                        1    12027 708.48
## - Vaccination_statusonly_CoV2  1    12044 708.61
## - NC_IgA                       1    12119 709.15
## - SDF1alpha                    1    12273 710.26
## - IL4                          1    12954 715.01
## 
## Step:  AIC=706.48
## ß2GPI_IgM ~ NC_IgG + NC_IgA + RBD_IgA + SeverityCoV2_or_Flu_int + 
##     Vaccination_statusonly_CoV2 + Age + IL4 + TNFalpha + SDF1alpha + 
##     IL17A + MIP1alpha
## 
##                               Df Deviance    AIC
## - Age                          1    11769 704.57
## - TNFalpha                     1    11834 705.05
## - RBD_IgA                      1    11874 705.36
## - MIP1alpha                    1    11922 705.70
## - SeverityCoV2_or_Flu_int      1    12001 706.29
## <none>                              11757 706.48
## - IL17A                        1    12036 706.55
## - Vaccination_statusonly_CoV2  1    12109 707.08
## - NC_IgG                       1    12139 707.30
## - NC_IgA                       1    12141 707.31
## - SDF1alpha                    1    12294 708.41
## - IL4                          1    13027 713.51
## 
## Step:  AIC=704.57
## ß2GPI_IgM ~ NC_IgG + NC_IgA + RBD_IgA + SeverityCoV2_or_Flu_int + 
##     Vaccination_statusonly_CoV2 + IL4 + TNFalpha + SDF1alpha + 
##     IL17A + MIP1alpha
## 
##                               Df Deviance    AIC
## - TNFalpha                     1    11840 703.10
## - RBD_IgA                      1    11886 703.44
## - MIP1alpha                    1    11936 703.81
## - SeverityCoV2_or_Flu_int      1    12030 704.50
## <none>                              11769 704.57
## - IL17A                        1    12041 704.58
## - NC_IgG                       1    12140 705.30
## - Vaccination_statusonly_CoV2  1    12179 705.59
## - NC_IgA                       1    12208 705.79
## - SDF1alpha                    1    12322 706.61
## - IL4                          1    13028 711.51
## 
## Step:  AIC=703.1
## ß2GPI_IgM ~ NC_IgG + NC_IgA + RBD_IgA + SeverityCoV2_or_Flu_int + 
##     Vaccination_statusonly_CoV2 + IL4 + SDF1alpha + IL17A + MIP1alpha
## 
##                               Df Deviance    AIC
## - RBD_IgA                      1    11951 701.92
## - MIP1alpha                    1    11974 702.09
## - IL17A                        1    12049 702.64
## - SeverityCoV2_or_Flu_int      1    12102 703.03
## <none>                              11840 703.10
## - NC_IgG                       1    12188 703.65
## - Vaccination_statusonly_CoV2  1    12207 703.79
## - NC_IgA                       1    12301 704.46
## - SDF1alpha                    1    12411 705.24
## - IL4                          1    13050 709.66
## 
## Step:  AIC=701.92
## ß2GPI_IgM ~ NC_IgG + NC_IgA + SeverityCoV2_or_Flu_int + Vaccination_statusonly_CoV2 + 
##     IL4 + SDF1alpha + IL17A + MIP1alpha
## 
##                               Df Deviance    AIC
## - MIP1alpha                    1    12095 700.98
## - IL17A                        1    12130 701.23
## - SeverityCoV2_or_Flu_int      1    12162 701.47
## <none>                              11951 701.92
## - Vaccination_statusonly_CoV2  1    12273 702.26
## - NC_IgA                       1    12440 703.45
## - SDF1alpha                    1    12500 703.88
## - NC_IgG                       1    12547 704.21
## - IL4                          1    13369 709.79
## 
## Step:  AIC=700.98
## ß2GPI_IgM ~ NC_IgG + NC_IgA + SeverityCoV2_or_Flu_int + Vaccination_statusonly_CoV2 + 
##     IL4 + SDF1alpha + IL17A
## 
##                               Df Deviance    AIC
## - IL17A                        1    12296 700.43
## - Vaccination_statusonly_CoV2  1    12358 700.87
## - SeverityCoV2_or_Flu_int      1    12360 700.88
## <none>                              12095 700.98
## - SDF1alpha                    1    12565 702.33
## - NC_IgA                       1    12599 702.57
## - NC_IgG                       1    12755 703.65
## - IL4                          1    13450 708.32
## 
## Step:  AIC=700.43
## ß2GPI_IgM ~ NC_IgG + NC_IgA + SeverityCoV2_or_Flu_int + Vaccination_statusonly_CoV2 + 
##     IL4 + SDF1alpha
## 
##                               Df Deviance    AIC
## - Vaccination_statusonly_CoV2  1    12517 699.99
## - SeverityCoV2_or_Flu_int      1    12560 700.30
## <none>                              12296 700.43
## - NC_IgA                       1    12688 701.19
## - SDF1alpha                    1    12740 701.55
## - NC_IgG                       1    13047 703.64
## - IL4                          1    14731 714.32
## 
## Step:  AIC=699.99
## ß2GPI_IgM ~ NC_IgG + NC_IgA + SeverityCoV2_or_Flu_int + IL4 + 
##     SDF1alpha
## 
##                           Df Deviance    AIC
## - SeverityCoV2_or_Flu_int  1    12609 698.64
## <none>                          12517 699.99
## - NC_IgA                   1    12900 700.65
## - NC_IgG                   1    13232 702.89
## - SDF1alpha                1    13595 705.26
## - IL4                      1    15146 714.77
## 
## Step:  AIC=698.64
## ß2GPI_IgM ~ NC_IgG + NC_IgA + IL4 + SDF1alpha
## 
##             Df Deviance    AIC
## <none>            12609 698.64
## - NC_IgA     1    12959 699.05
## - NC_IgG     1    13300 701.33
## - SDF1alpha  1    13646 703.59
## - IL4        1    15148 712.78
#AnV model

AnV_IgM_model1 <- glm(AnnV_IgM ~ NC_IgG + PC1_IgG +
                        RBD_IgA + PC1_IgA +
                        SeverityCoV2_or_Flu_int +
                        Acute_SARS_CoV_2_infection +
                        Age +
                        TNFalpha + IL17A + Vaccination_statusonly_CoV2 + IL4,
                      data = data_aPL_regr_plot_AnV_IgM, 
                      family = gaussian(link = 'identity'))

AnV_IgM_model1_AIC <- stepAIC(AnV_IgM_model1, trace=TRUE)
## Start:  AIC=656.45
## AnnV_IgM ~ NC_IgG + PC1_IgG + RBD_IgA + PC1_IgA + SeverityCoV2_or_Flu_int + 
##     Acute_SARS_CoV_2_infection + Age + TNFalpha + IL17A + Vaccination_statusonly_CoV2 + 
##     IL4
## 
##                               Df Deviance    AIC
## - RBD_IgA                      1    17838 654.51
## - Acute_SARS_CoV_2_infection   1    17858 654.60
## - SeverityCoV2_or_Flu_int      1    17886 654.72
## - NC_IgG                       1    17900 654.78
## - PC1_IgG                      1    17907 654.81
## - Vaccination_statusonly_CoV2  1    17922 654.87
## - PC1_IgA                      1    17932 654.91
## - IL4                          1    18147 655.82
## - TNFalpha                     1    18154 655.85
## <none>                              17822 656.45
## - IL17A                        1    18634 657.83
## - Age                          1    20338 664.48
## 
## Step:  AIC=654.51
## AnnV_IgM ~ NC_IgG + PC1_IgG + PC1_IgA + SeverityCoV2_or_Flu_int + 
##     Acute_SARS_CoV_2_infection + Age + TNFalpha + IL17A + Vaccination_statusonly_CoV2 + 
##     IL4
## 
##                               Df Deviance    AIC
## - Acute_SARS_CoV_2_infection   1    17879 652.69
## - SeverityCoV2_or_Flu_int      1    17891 652.74
## - NC_IgG                       1    17918 652.85
## - PC1_IgG                      1    17928 652.90
## - Vaccination_statusonly_CoV2  1    17950 652.99
## - IL4                          1    18154 653.85
## - TNFalpha                     1    18169 653.91
## - PC1_IgA                      1    18294 654.43
## <none>                              17838 654.51
## - IL17A                        1    18680 656.02
## - Age                          1    20368 662.59
## 
## Step:  AIC=652.69
## AnnV_IgM ~ NC_IgG + PC1_IgG + PC1_IgA + SeverityCoV2_or_Flu_int + 
##     Age + TNFalpha + IL17A + Vaccination_statusonly_CoV2 + IL4
## 
##                               Df Deviance    AIC
## - PC1_IgG                      1    17944 650.96
## - Vaccination_statusonly_CoV2  1    17977 651.10
## - NC_IgG                       1    18010 651.24
## - IL4                          1    18161 651.88
## - TNFalpha                     1    18208 652.07
## - SeverityCoV2_or_Flu_int      1    18299 652.45
## - PC1_IgA                      1    18312 652.51
## <none>                              17879 652.69
## - IL17A                        1    18724 654.20
## - Age                          1    20465 660.95
## 
## Step:  AIC=650.96
## AnnV_IgM ~ NC_IgG + PC1_IgA + SeverityCoV2_or_Flu_int + Age + 
##     TNFalpha + IL17A + Vaccination_statusonly_CoV2 + IL4
## 
##                               Df Deviance    AIC
## - Vaccination_statusonly_CoV2  1    17992 649.17
## - IL4                          1    18175 649.94
## - PC1_IgA                      1    18315 650.52
## - TNFalpha                     1    18340 650.62
## - SeverityCoV2_or_Flu_int      1    18365 650.73
## <none>                              17944 650.96
## - IL17A                        1    18979 653.23
## - NC_IgG                       1    19116 653.77
## - Age                          1    20562 659.31
## 
## Step:  AIC=649.17
## AnnV_IgM ~ NC_IgG + PC1_IgA + SeverityCoV2_or_Flu_int + Age + 
##     TNFalpha + IL17A + IL4
## 
##                           Df Deviance    AIC
## - IL4                      1    18212 648.09
## - TNFalpha                 1    18354 648.68
## - PC1_IgA                  1    18363 648.72
## <none>                          17992 649.17
## - SeverityCoV2_or_Flu_int  1    18765 650.36
## - IL17A                    1    18979 651.23
## - NC_IgG                   1    19185 652.05
## - Age                      1    20886 658.50
## 
## Step:  AIC=648.09
## AnnV_IgM ~ NC_IgG + PC1_IgA + SeverityCoV2_or_Flu_int + Age + 
##     TNFalpha + IL17A
## 
##                           Df Deviance    AIC
## - TNFalpha                 1    18424 646.97
## - PC1_IgA                  1    18566 647.55
## <none>                          18212 648.09
## - IL17A                    1    19160 649.95
## - SeverityCoV2_or_Flu_int  1    19324 650.59
## - NC_IgG                   1    19351 650.70
## - Age                      1    20991 656.88
## 
## Step:  AIC=646.97
## AnnV_IgM ~ NC_IgG + PC1_IgA + SeverityCoV2_or_Flu_int + Age + 
##     IL17A
## 
##                           Df Deviance    AIC
## - PC1_IgA                  1    18814 646.56
## <none>                          18424 646.97
## - IL17A                    1    19278 648.41
## - SeverityCoV2_or_Flu_int  1    19385 648.84
## - NC_IgG                   1    19452 649.10
## - Age                      1    21079 655.20
## 
## Step:  AIC=646.56
## AnnV_IgM ~ NC_IgG + SeverityCoV2_or_Flu_int + Age + IL17A
## 
##                           Df Deviance    AIC
## <none>                          18814 646.56
## - NC_IgG                   1    19466 647.15
## - IL17A                    1    19576 647.58
## - SeverityCoV2_or_Flu_int  1    19669 647.94
## - Age                      1    21471 654.60
#CL model

CL_IgG_model1 <- glm(CL_IgG ~ NC_IgG + Spike_IgG + PC1_IgG +
                        RBD_IgA + Spike_IgA + PC1_IgA +
                        IL8 + IL4 + MIP1alpha + SDF1alpha +
                        Age,
                      data = data_aPL_regr_plot_CL_IgG, 
                      family = gaussian(link = 'identity'))

CL_IgG_model1_AIC <- stepAIC(CL_IgG_model1, trace=TRUE)
## Start:  AIC=332.73
## CL_IgG ~ NC_IgG + Spike_IgG + PC1_IgG + RBD_IgA + Spike_IgA + 
##     PC1_IgA + IL8 + IL4 + MIP1alpha + SDF1alpha + Age
## 
##             Df Deviance    AIC
## - IL4        1   2405.9 330.76
## - PC1_IgA    1   2407.9 330.80
## - MIP1alpha  1   2434.2 331.29
## - RBD_IgA    1   2459.1 331.74
## - SDF1alpha  1   2491.9 332.34
## <none>           2404.2 332.73
## - Age        1   2578.0 333.87
## - IL8        1   2603.5 334.31
## - Spike_IgA  1   2693.1 335.83
## - NC_IgG     1   2714.7 336.19
## - Spike_IgG  1   2742.1 336.65
## - PC1_IgG    1   2866.3 338.64
## 
## Step:  AIC=330.76
## CL_IgG ~ NC_IgG + Spike_IgG + PC1_IgG + RBD_IgA + Spike_IgA + 
##     PC1_IgA + IL8 + MIP1alpha + SDF1alpha + Age
## 
##             Df Deviance    AIC
## - PC1_IgA    1   2410.0 328.84
## - MIP1alpha  1   2439.1 329.38
## - RBD_IgA    1   2463.6 329.83
## - SDF1alpha  1   2493.0 330.36
## <none>           2405.9 330.76
## - Age        1   2578.0 331.87
## - IL8        1   2603.6 332.31
## - Spike_IgA  1   2693.2 333.84
## - NC_IgG     1   2714.7 334.19
## - Spike_IgG  1   2742.9 334.66
## - PC1_IgG    1   2866.6 336.64
## 
## Step:  AIC=328.84
## CL_IgG ~ NC_IgG + Spike_IgG + PC1_IgG + RBD_IgA + Spike_IgA + 
##     IL8 + MIP1alpha + SDF1alpha + Age
## 
##             Df Deviance    AIC
## - MIP1alpha  1   2442.2 327.43
## - RBD_IgA    1   2488.4 328.28
## - SDF1alpha  1   2514.0 328.74
## <none>           2410.0 328.84
## - Age        1   2601.0 330.27
## - IL8        1   2605.1 330.34
## - NC_IgG     1   2728.7 332.43
## - Spike_IgG  1   2776.1 333.20
## - Spike_IgA  1   2803.5 333.64
## - PC1_IgG    1   2870.3 334.70
## 
## Step:  AIC=327.43
## CL_IgG ~ NC_IgG + Spike_IgG + PC1_IgG + RBD_IgA + Spike_IgA + 
##     IL8 + SDF1alpha + Age
## 
##             Df Deviance    AIC
## - RBD_IgA    1   2526.6 326.96
## <none>           2442.2 327.43
## - SDF1alpha  1   2581.6 327.93
## - Age        1   2615.9 328.53
## - NC_IgG     1   2799.0 331.57
## - Spike_IgG  1   2834.5 332.14
## - IL8        1   2848.9 332.37
## - PC1_IgG    1   2951.7 333.96
## - Spike_IgA  1   3014.3 334.91
## 
## Step:  AIC=326.96
## CL_IgG ~ NC_IgG + Spike_IgG + PC1_IgG + Spike_IgA + IL8 + SDF1alpha + 
##     Age
## 
##             Df Deviance    AIC
## <none>           2526.6 326.96
## - Age        1   2658.6 327.25
## - SDF1alpha  1   2697.7 327.91
## - NC_IgG     1   2862.1 330.57
## - IL8        1   2907.2 331.28
## - Spike_IgG  1   2944.6 331.85
## - Spike_IgA  1   3016.7 332.94
## - PC1_IgG    1   3074.6 333.80
# Data follow-up

prob_PT_IgM_model1 <- PT_IgM_model1 %>% predict(data_aPL_regr_plot_PT_IgM, type = "response")
prob_PT_IgM_model1_AIC <- PT_IgM_model1_AIC %>% predict(data_aPL_regr_plot_PT_IgM, type = "response")
prob_ß2GPI_IgM_model1 <- ß2GPI_IgM_model1 %>% predict(data_aPL_regr_plot_B2GPI_IgM, type = "response")
prob_ß2GPI_IgM_model1_AIC <- ß2GPI_IgM_model1_AIC %>% predict(data_aPL_regr_plot_B2GPI_IgM, type = "response")
prob_AnV_IgM_model1 <- AnV_IgM_model1 %>% predict(data_aPL_regr_plot_AnV_IgM, type = "response")
prob_AnV_IgM_model1_AIC <- AnV_IgM_model1_AIC %>% predict(data_aPL_regr_plot_AnV_IgM, type = "response")
prob_CL_IgG_model1 <- CL_IgG_model1 %>% predict(data_aPL_regr_plot_CL_IgG, type = "response")
prob_CL_IgG_model1_AIC <- CL_IgG_model1_AIC %>% predict(data_aPL_regr_plot_CL_IgG, type = "response")

data_aPL_regr_plot_PT_IgM$PT_IgM_model1_predicted_prob <- as.numeric(prob_PT_IgM_model1)
data_aPL_regr_plot_PT_IgM$PT_IgM_model1_AIC_predicted_prob <- as.numeric(prob_PT_IgM_model1_AIC)
data_aPL_regr_plot_B2GPI_IgM$ß2GPI_IgM_model1_predicted_prob <- as.numeric(prob_ß2GPI_IgM_model1)
data_aPL_regr_plot_B2GPI_IgM$ß2GPI_IgM_model1_AIC_predicted_prob <- as.numeric(prob_ß2GPI_IgM_model1_AIC)
data_aPL_regr_plot_AnV_IgM$AnV_IgM_model1_predicted_prob <- as.numeric(prob_AnV_IgM_model1)
data_aPL_regr_plot_AnV_IgM$AnV_IgM_model1_AIC_predicted_prob <- as.numeric(prob_AnV_IgM_model1_AIC)
data_aPL_regr_plot_CL_IgG$CL_IgG_model1_predicted_prob <- as.numeric(prob_CL_IgG_model1)
data_aPL_regr_plot_CL_IgG$CL_IgG_model1_AIC_predicted_prob <- as.numeric(prob_CL_IgG_model1_AIC)

# Combine

data_aPL_regr_plot_for_plotting_PT <- data_aPL_regr_plot_PT_IgM %>%
  dplyr::select(Unique_sample_ID_for_study, PT_IgM, PT_IgM_model1_predicted_prob, PT_IgM_model1_AIC_predicted_prob) %>%
  dplyr::mutate(Model='A.PT') %>%
  dplyr::rename(Original_value = PT_IgM) %>%
  dplyr::rename(Predicted_value = PT_IgM_model1_predicted_prob) %>%
  dplyr::rename(Predicted_value_AIC = PT_IgM_model1_AIC_predicted_prob)

data_aPL_regr_plot_for_plotting_ß2GPI <- data_aPL_regr_plot_B2GPI_IgM %>%
  dplyr::select(Unique_sample_ID_for_study, ß2GPI_IgM, ß2GPI_IgM_model1_predicted_prob, ß2GPI_IgM_model1_AIC_predicted_prob) %>%
  dplyr::mutate(Model='B.ß2GPI') %>%
  dplyr::rename(Original_value = ß2GPI_IgM) %>%
  dplyr::rename(Predicted_value = ß2GPI_IgM_model1_predicted_prob) %>%
  dplyr::rename(Predicted_value_AIC = ß2GPI_IgM_model1_AIC_predicted_prob)

data_aPL_regr_plot_for_plotting_AnV <- data_aPL_regr_plot_AnV_IgM %>%
  dplyr::select(Unique_sample_ID_for_study, AnnV_IgM, AnV_IgM_model1_predicted_prob, AnV_IgM_model1_AIC_predicted_prob) %>%
  dplyr::mutate(Model='C.AnV') %>%
  dplyr::rename(Original_value = AnnV_IgM) %>%
  dplyr::rename(Predicted_value = AnV_IgM_model1_predicted_prob) %>%
  dplyr::rename(Predicted_value_AIC = AnV_IgM_model1_AIC_predicted_prob)

data_aPL_regr_plot_for_plotting_CL <- data_aPL_regr_plot_CL_IgG %>%
  dplyr::select(Unique_sample_ID_for_study, CL_IgG, CL_IgG_model1_predicted_prob, CL_IgG_model1_AIC_predicted_prob) %>%
  dplyr::mutate(Model='D.CL') %>%
  dplyr::rename(Original_value = CL_IgG) %>%
  dplyr::rename(Predicted_value = CL_IgG_model1_predicted_prob) %>%
  dplyr::rename(Predicted_value_AIC = CL_IgG_model1_AIC_predicted_prob)

data_aPL_regr_plot_for_plotting <- data_aPL_regr %>%
  dplyr::select(Unique_sample_ID_for_study)

data_aPL_regr_plot_for_plotting = bind_rows(data_aPL_regr_plot_for_plotting_PT,
                                            data_aPL_regr_plot_for_plotting_ß2GPI,
                                            data_aPL_regr_plot_for_plotting_AnV,
                                            data_aPL_regr_plot_for_plotting_CL)
obs_vs_pred01 <-
ggplot(data = data_aPL_regr_plot_for_plotting, aes(x = Predicted_value, y = Original_value, color=Model)) +
  geom_pointrange(aes(ymin = Original_value, ymax = Original_value, xmin=Original_value, xmax=Original_value)) +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), colour='blue') +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), colour='blue', lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_abline(slope = 1, intercept = 0, color="red", linetype='dashed') +
  facet_wrap(~Model,ncol=4) +
  theme(axis.text.y = element_text(size = 8)) +
  theme(legend.position="none") +
  labs(y="Observed aPL level",x="Predicted aPL level",title="Observed versus predicted aPL level for models")

obs_vs_pred02 <-
ggplot(data = data_aPL_regr_plot_for_plotting, aes(x = Predicted_value_AIC, y = Original_value, color=Model)) +
  geom_pointrange(aes(ymin = Original_value, ymax = Original_value, xmin=Original_value, xmax=Original_value)) +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), colour='blue') +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), colour='blue', lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_abline(slope = 1, intercept = 0, color="red", linetype='dashed') +
  facet_wrap(~Model,ncol=4) +
  theme(axis.text.y = element_text(size = 8)) +
  theme(legend.position="none") +
  labs(y="Observed aPL level",x="Predicted aPL level",title="Observed versus predicted aPL level for AIC improved models")

To ensure the validity of the approach, and, particularly, the plotting, we double check the assumptions using a slightly different way of obtaining the visualisation.

ggscatter(data=data_aPL_regr_plot_for_plotting, 
  x = "Predicted_value", 
  y = "Original_value",
  color = "Model", 
  #palette = "jco",
  add = "reg.line",
  cor.method = 'spearman'
) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_cor(aes(color = Model),method='spearman') +
  labs(y="Original aPL level",x="Predicted aPL level",title="Original versus predicted aPL level for models") +
  theme_classic()

ggscatter(data=data_aPL_regr_plot_for_plotting, 
  x = "Predicted_value_AIC", 
  y = "Original_value",
  color = "Model", 
  #palette = "jco",
  add = "reg.line",
  cor.method = 'spearman'
) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_cor(aes(color = Model),method='spearman') +
  labs(y="Observed aPL level",x="Predicted aPL level",title="Observed versus predicted aPL level for AIC improved models") +
  theme_classic()

Observations:

  • The R and p-values obtained are perfectly matched.
  • As seen before,the regression lines necessarily go through the origin and have a slope of 1.
    • This is expected as the slopes and intercept of the observed values are, by definition, the same as the slopes and intercept of the predicted values, based on the observed values.
    • The new slopes and intercept are therefore normalised to b0=0 and slope=1.
    • However, this observation only holds true as long as we plot ALL values with which the initial model was built.
    • We thank Georg Meisl for verifying this assumption.

Additionally, we generate a non-informative model to visualise how the prediction then looks.

# PT model non-informative

PT_IgM_model_noninform <- glm(PT_IgM ~ Immunosuppressed_admission,
                     data = data_aPL_regr_plot_PT_IgM, 
                     family = gaussian(link = 'identity'))

# PT model age only
PT_IgM_model_age <- glm(PT_IgM ~ Age,
                     data = data_aPL_regr_plot_PT_IgM, 
                     family = gaussian(link = 'identity'))

# PT model sex only
PT_IgM_model_sex <- glm(PT_IgM ~ Sex,
                     data = data_aPL_regr_plot_PT_IgM, 
                     family = gaussian(link = 'identity'))


prob_PT_IgM_model_noninform <- PT_IgM_model_noninform %>% predict(data_aPL_regr_plot_PT_IgM, type = "response")
prob_PT_IgM_model_age <- PT_IgM_model_age %>% predict(data_aPL_regr_plot_PT_IgM, type = "response")
prob_PT_IgM_model_sex <- PT_IgM_model_sex %>% predict(data_aPL_regr_plot_PT_IgM, type = "response")


data_aPL_regr_plot_PT_IgM$PT_IgM_model1_predicted_prob_noninform <- as.numeric(prob_PT_IgM_model_noninform)
data_aPL_regr_plot_PT_IgM$PT_IgM_model1_predicted_prob_age <- as.numeric(prob_PT_IgM_model_age)
data_aPL_regr_plot_PT_IgM$PT_IgM_model1_predicted_prob_sex <- as.numeric(prob_PT_IgM_model_sex)

data_aPL_regr_plot_noninform_QC <- data_aPL_regr_plot_PT_IgM %>%
  dplyr::select(Unique_sample_ID_for_study, PT_IgM, PT_IgM_model1_predicted_prob_noninform) %>%
  dplyr::mutate(Model='A.Noninform') %>%
  dplyr::rename(Original_value = PT_IgM) %>%
  dplyr::rename(Predicted_value = PT_IgM_model1_predicted_prob_noninform)


data_aPL_regr_plot_age_QC <- data_aPL_regr_plot_PT_IgM %>%
  dplyr::select(Unique_sample_ID_for_study, PT_IgM, PT_IgM_model1_predicted_prob_age) %>%
  dplyr::mutate(Model='B.Age only') %>%
  dplyr::rename(Original_value = PT_IgM) %>%
  dplyr::rename(Predicted_value = PT_IgM_model1_predicted_prob_age)

data_aPL_regr_plot_diff_model_QC <- data_aPL_regr_plot_PT_IgM %>% #Here, we use the sample-matched IgA data, i.e. the data being used is not the one on which the regression had been performed. Hence, we expect both the slopes and the intercepts to differ among each other.
  dplyr::select(Unique_sample_ID_for_study, PT_IgA, PT_IgM_model1_predicted_prob_noninform) %>%
  dplyr::mutate(Model='C.Age only, PT IgA') %>%
  dplyr::rename(Original_value = PT_IgA) %>%
  dplyr::rename(Predicted_value = PT_IgM_model1_predicted_prob_noninform)

data_aPL_regr_plot_sex_QC <- data_aPL_regr_plot_PT_IgM %>%
  dplyr::select(Unique_sample_ID_for_study, PT_IgM, PT_IgM_model1_predicted_prob_sex) %>%
  dplyr::mutate(Model='D.Sex only') %>%
  dplyr::rename(Original_value = PT_IgM) %>%
  dplyr::rename(Predicted_value = PT_IgM_model1_predicted_prob_sex)

data_aPL_regr_plot_QC <- data_aPL_regr %>%
  dplyr::select(Unique_sample_ID_for_study)

data_aPL_regr_plot_QC = bind_rows(data_aPL_regr_plot_noninform_QC,
                                            data_aPL_regr_plot_age_QC, data_aPL_regr_plot_diff_model_QC,data_aPL_regr_plot_sex_QC)

obs_vs_pred03 <-
ggplot(data = data_aPL_regr_plot_QC, aes(x = Predicted_value, y = Original_value, color=Model)) +
  geom_pointrange(aes(ymin = Original_value, ymax = Original_value, xmin=Original_value, xmax=Original_value)) +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), colour='blue') +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), colour='blue', lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_abline(slope = 1, intercept = 0, color="red", linetype='dashed') +
  facet_wrap(~Model,ncol=4) +
  theme(axis.text.y = element_text(size = 8)) +
  theme(legend.position="none") +
  labs(y="Observed aPL level",x="Predicted aPL level",title="Observed versus predicted aPL level for non-informative models")

ggarrange(obs_vs_pred01, obs_vs_pred02, obs_vs_pred03,
          labels = c("A", "B", 'C'),
          ncol = 1, nrow = 3)

Observations:

  • The models are unsuited to predict the observed aPL values, both
    • the uninformative (where, in regression, only ‘immunosuppressed at admission’ was chosen) model as well as
    • the model that includes only age.
  • The choice of parameters for modelling are thus important to obtain a model of valid predictive character.

Overall, we conclude that while not in general, aPL levels for PT IgM, B2GPI IgM, AnV IgM, and CL IgG are modulated by the strength of the antibody response against SARS-CoV-2 and further modulated by other factors, including age. These observations validate findings reported previously.

6 SEROCONVERSION

In our previous study, we have shown the presence of IgM aPL titres but titres for IgG were low and did not display a change between infected and non-infected individuals. To increase the scrutiny of these reports, we have undertaken the effort to characterise seroconversion, in IgM, IgG, and IgA aPL. We use multiple approaches to investigate the phenomenon.

6.1 We compare IgG, IgM, and IgA of the same individuals, time-point-matched

We group together all IgG, IgM, IgA measurements

compare_means(aPL_signal ~ Isotype,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'),
              method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 3 × 8
##   .y.        group1 group2         p    p.adj p.format p.signif method  
##   <chr>      <chr>  <chr>      <dbl>    <dbl> <chr>    <chr>    <chr>   
## 1 aPL_signal IgG    IgM    6.79e- 32 6.8e- 32 <2e-16   ****     Wilcoxon
## 2 aPL_signal IgG    IgA    2.03e-236 6.1e-236 <2e-16   ****     Wilcoxon
## 3 aPL_signal IgM    IgA    3.97e- 73 5.9e- 73 <2e-16   ****     Wilcoxon

Observations:

  • We observe that mean IgM values are generally higher than mean IgG or IgA.
  • We observe that mean IgA values are generally higher than mean IgG.
  • All distributions are significantly different.
seroconv01 <-
subset(data_vertical, CoV2_type == 'Spike_IgG') %>%
  dplyr::arrange(Isotype) %>%
  ggboxplot(x = "Isotype", y = "aPL_signal",
    color = "Isotype", palette = c(col4), outlier.shape = NA, add = "mean",
    merge = TRUE
  ) +
    geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = Isotype)) +
    geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
    stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
                 aes(group = Isotype), color = "darkred",
                 position = position_dodge(width = 0.8)) +
    stat_summary(fun = mean, colour = "red", 
                 position = position_dodge(width = 0.8),
                 geom = "text", vjust = -0.7, 
                 aes(label = round(..y.., digits = 1), group = Isotype)) +
    guides(x =  guide_axis(angle = 90)) +
    theme_classic()

6.2 We correlate the three isotypes against each other

ggscatter(
  data=subset(data_vertical, CoV2_type == 'Spike_IgG' | Isotype == 'IgG'), 
  x = "aPL_signal_IgG",
  y = "aPL_signal_IgM",
  palette = "jco",
  add = "reg.line",
  cor.method = 'spearman'
  ) +
  stat_cor(method='spearman') +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'

We notice that there is hardly any correlation.

ggscatter(
  data=subset(data_vertical, CoV2_type == 'Spike_IgG' | Isotype == 'IgG'), 
  x = "aPL_signal_IgG",
  y = "aPL_signal_IgA",
  palette = "jco",
  add = "reg.line",
  cor.method = 'spearman'
) +
  stat_cor(method='spearman') +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 420 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 420 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 420 rows containing missing values (`geom_point()`).

We notice that there is hardly any correlation but slightly more than when IgM is involved.

ggscatter(
  data=subset(data_vertical, CoV2_type == 'Spike_IgG' | Isotype == 'IgG'), 
  x = "aPL_signal_IgM",
  y = "aPL_signal_IgA",
  palette = "jco",
  add = "reg.line",
  cor.method = 'spearman'
) +
  stat_cor(method='spearman') +
  theme_classic()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 420 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 420 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 420 rows containing missing values (`geom_point()`).

We notice that there is hardly any correlation.

6.3 We plot the DPO with colour-coded IgG, IgA, and IgM

seroconv02 <-
   ggplot(data=subset(data_vertical, CoV2_type == 'Spike_IgG'), aes(x = DPOCoV2_or_FLU, y = aPL_signal, color=Isotype)) +
  geom_point() +
  geom_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype)) +
  stat_smooth(method = "glm", 
              method.args = list(family = gaussian()), aes(colour=Isotype), lty='dotted', geom='ribbon', fill=NA) +
  stat_cor(method='spearman') +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  scale_fill_manual(values=c(col4)) +
  scale_color_manual(values=c(col4)) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL signal",x="DPO",title="")

Observations:

  • When observing the behaviour of levels based on DPO, i.e. restricting the analysis on individuals that had been infected and thus have a DPO, we see that there is hardly any trend, and clearly not a linear trend. Neither for IgM, nor for IgG or IgA.
  • Importantly, there is no evidence for seroconversion on the global scale.

6.3.0.1 5.4. We plot the DPO for previous and new data (i.e. we compare the two datasets) with colour-coded datasets and shapes for IgG, IgA, and IgM

ggscatter(
  data=subset(published_data_comparison,
              Isotype == 'IgG' | 
              Isotype == 'IgM' |
              Isotype == 'IgA'), 
  x = "DPOCoV2_or_FLU", 
  y = "aPL_signal",
  color = "dataset", 
  palette = "npg",
  cor.method = 'spearman'
  ) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  facet_wrap(~Isotype,ncol=3) +
  theme_classic()
## Warning: Removed 2340 rows containing missing values (`geom_point()`).

Observations:

  • As seen before, we cover a larger range of DPO in the new dataset.
  • Also, as seen before, the old dataset had some ‘higher’ values, in general.
  • Overall, the distributions are comparable.

6.3.0.2 5.5. We look at different timepoints of the same individuals

##### We compare the two timepoints for IgG and IgM
compare_means(aPL_signal ~ Timepoint,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'),
              method = "wilcox", group.by = 'Isotype', symnum.args=signifiance_stars, p.adjust.method = "BH")
## # A tibble: 3 × 9
##   Isotype .y.        group1 group2         p   p.adj p.format p.signif method  
##   <chr>   <chr>      <chr>  <chr>      <dbl>   <dbl> <chr>    <chr>    <chr>   
## 1 IgG     aPL_signal 1      2      0.893     0.89    0.8926   ns       Wilcoxon
## 2 IgM     aPL_signal 1      2      0.0000544 0.00016 5.4e-05  ***      Wilcoxon
## 3 IgA     aPL_signal 1      2      0.00517   0.0078  0.0052   *        Wilcoxon

Observations:

  • Globally, IgM values tend to increase at the second time point, but not IgG.
  • IgA follows the trend of IgM.
seroconv03 <-
  subset(data_vertical, CoV2_type == 'Spike_IgG') %>%
  dplyr::arrange(Isotype) %>%
  ggplot(aes(x = Isotype, y = aPL_signal)) +
    geom_boxplot(aes(color = as.factor(Timepoint)), outlier.shape = NA, add = "mean") +
    geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = as.factor(Timepoint))) +
    geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
    stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
                 aes(group = interaction(Isotype, Timepoint)), color = "darkred",
                 position = position_dodge(width = 0.8)) +
    stat_summary(fun = mean, colour = "red", 
                 position = position_dodge(width = 0.8),
                 geom = "text", vjust = -0.7, 
                 aes(label = round(..y.., digits = 1), group = interaction(Isotype, Timepoint))) +
    scale_color_manual(values=c('black', 'darkgrey')) +
    theme_classic()
## Warning in geom_boxplot(aes(color = as.factor(Timepoint)), outlier.shape = NA, :
## Ignoring unknown parameters: `add`

6.4 We look at the distance between the two measurements, where available

We use a natural spline to model the behaviour of the DPO over time, individually for IgM, IgG, and IgA. See here

for what some of the parameters of the spline mean. We hope that the spline if the trend was nonlinear, e.g., would indicate such a behaviour. If the spline appears largely monotonous and linear, we will interpret it as such and in this case, we will observe the slope. If,then, there is an indication that the slope should be considered more closely, we will use a proper linear model to perform a linear regression on the data.

# We assess the percentiles in the data per isotype
data_vertical_tp %>%
  group_by(Isotype) %>%
  summarise(percent10 = quantile(Time_diff, probs = .1),
            percent25 = quantile(Time_diff, probs = .25),
            percent50 = quantile(Time_diff, probs = .5),
            percent75 = quantile(Time_diff, probs = .75),
            percent90 = quantile(Time_diff, probs = .9))
## # A tibble: 3 × 6
##   Isotype percent10 percent25 percent50 percent75 percent90
##   <chr>       <dbl>     <dbl>     <dbl>     <dbl>     <dbl>
## 1 IgA             0         0         0         0         8
## 2 IgG             0         0         0         0         8
## 3 IgM             0         0         0         0         8
# We assess the percentiles in the data regardless of isotype - they are all the same
data_vertical_tp %>%
  summarise(percent10 = quantile(Time_diff, probs = .1),
            percent25 = quantile(Time_diff, probs = .25),
            percent50 = quantile(Time_diff, probs = .5),
            percent75 = quantile(Time_diff, probs = .75),
            percent90 = quantile(Time_diff, probs = .9))
##   percent10 percent25 percent50 percent75 percent90
## 1         0         0         0         0         8
# Comment: the data is obviously highly shifted towards 0. For the spline, to give it flexibility, we set a knot at the 90th percentile, which, looking at the data, is justified.

# We fit the spline and plot the data

seroconv04 <-
ggscatter(
  data=data_vertical_tp, 
  x = "Time_diff", 
  y = "aPL_signal",
  shape = 'Isotype',
  cor.method = 'spearman'
) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  geom_line(aes(group = interaction(Unique_patient_ID, aPL_type), color=interaction(Unique_patient_ID, aPL_type)), linetype='dashed') +
  geom_smooth(method ='lm', formula = y ~ splines::ns(x, knots=c(10), # We define an interior knot at the 90th percentile. 
                                                      Boundary.knots = range(x)), #= c(0,50)), i.e. We could set the boundary knots at the actual boundary...
              aes(fill=Isotype)) +
  theme_classic() +
  theme(legend.position="none")
seroconv04
## Warning: Removed 140 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 140 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing missing values (`geom_line()`).

ggarrange(seroconv01, seroconv02, seroconv03, seroconv04,
          labels = c("A", "B", 'C','D'),
          ncol = 2, nrow = 2)
## Warning: Removed 140 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 140 rows containing non-finite values (`stat_summary()`).
## Removed 140 rows containing non-finite values (`stat_summary()`).
## Removed 140 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 140 rows containing missing values (`geom_point()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1680 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 1680 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 1680 rows containing non-finite values (`stat_cor()`).
## Warning: Removed 1680 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 140 rows containing non-finite values (`stat_summary()`).
## Removed 140 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 140 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 140 rows containing missing values (`geom_point()`).
## Warning: Removed 140 rows containing missing values (`geom_line()`).

Here we look at matched patient samples with more than one timepoints. The dashed lines connect patients for a given aPL (e.g. PI) and isotype (e.g. IgA), i.e. PI_IgA, or PI_IgG, or PI_IgM, etc.

  • square, red spline: IgM
  • round. blue spline: IgA
  • triangle, green spline: IgG

Observations:

  • We cannot identify a trend towards higher values with increased DPO…
  • Overall, we do not see a trend for IgG seroconversion.
  • The titres seem remarkably stable over time.
  • The near-linear behaviour of the splines indicates that overall, the titres remained similar over time. If anything, after a small increase, the splines (IgA and IgM) decrease again with later time points. The regression on IgG is behaving as a flat line.

Conclusions:

Both when looking at all data together, or when looking at matched time points, there is no indication for seroconversion in the given dataset. On the contrary, IgM, IgG, and IgA aPL appear stable over time.

7 COMPARISON OF SARS-COV-2 INFECTED WITH VACCINATED AND WITH INFLUENZA

  • If we compare the aPL levels in SARS-CoV-2 infected individuals to another disease, here influenza, does it look generic or specific to SARS-CoV-2?
  • Is it infection with SARS-CoV-2 that elicits higher levels of aPL IgM, or equally so vaccination with mRNA vaccine?

7.1 We look at it in a generic manner - we compare all aPL together

### We conduct the statistics and save tables

# IgM

aPL_IgM_stats_together <-
compare_means(aPL_signal_IgM ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG' & Isotype == 'IgM'), method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH", ref.group = '01_Non-infected/non-vaccinated') %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)

# IgG
aPL_IgG_stats_together <-
compare_means(aPL_signal_IgG ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG' & Isotype == 'IgG'), method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH", ref.group = '01_Non-infected/non-vaccinated') %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)

# IgA
aPL_IgA_stats_together <-
compare_means(aPL_signal_IgA ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG' & Isotype == 'IgA'), method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH", ref.group = '01_Non-infected/non-vaccinated') %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)

###  We calculate the fold change from baseline (non-infected/non-vaccinated), for all isotypes separately

# IgM

data_vertical_summary_IgM_map <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgM') %>%
  dplyr::filter(COVID_vaccination_Group=='01_Non-infected/non-vaccinated') %>%
  dplyr::group_by(COVID_vaccination_Group) %>%
  dplyr::summarise(Median_baseline=median(aPL_signal_IgM),na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP')

data_vertical_summary_IgM <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgM') %>%
  dplyr::group_by(COVID_vaccination_Group) %>%
  dplyr::summarise(Mean=mean(aPL_signal_IgM), Max=max(aPL_signal_IgM), Min=min(aPL_signal_IgM), Median=median(aPL_signal_IgM), Std=sd(aPL_signal_IgM),Q1=quantile(aPL_signal_IgM, probs = 0.25), Q3=quantile(aPL_signal_IgM, probs = 0.75),na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP') %>%
  dplyr::left_join(data_vertical_summary_IgM_map, by = 'MAP') %>%
  dplyr::mutate(Median_change = round(Median/Median_baseline,2))

# IgG

data_vertical_summary_IgG_map <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgG') %>%
  dplyr::filter(COVID_vaccination_Group=='01_Non-infected/non-vaccinated') %>%
  dplyr::group_by(COVID_vaccination_Group) %>%
  dplyr::summarise(Median_baseline=median(aPL_signal_IgG),na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP')

data_vertical_summary_IgG <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgG') %>%
  dplyr::group_by(COVID_vaccination_Group) %>%
  dplyr::summarise(Mean=mean(aPL_signal_IgG), Max=max(aPL_signal_IgG), Min=min(aPL_signal_IgG), Median=median(aPL_signal_IgG), Std=sd(aPL_signal_IgG),Q1=quantile(aPL_signal_IgG, probs = 0.25), Q3=quantile(aPL_signal_IgG, probs = 0.75),na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP') %>%
  dplyr::left_join(data_vertical_summary_IgG_map, by = 'MAP') %>%
  dplyr::mutate(Median_change = round(Median/Median_baseline,2))

# IgA

data_vertical_summary_IgA_map <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgA') %>%
  dplyr::filter(COVID_vaccination_Group=='01_Non-infected/non-vaccinated') %>%
  dplyr::filter(!is.na(as.numeric(aPL_signal))) %>%
  dplyr::group_by(COVID_vaccination_Group) %>%
  dplyr::summarise(Median_baseline=median(aPL_signal_IgA),na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP')

data_vertical_summary_IgA <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgA') %>%
  dplyr::group_by(COVID_vaccination_Group) %>%
  dplyr::filter(!is.na(as.numeric(aPL_signal))) %>%
  dplyr::summarise(Mean=mean(aPL_signal_IgA), Max=max(aPL_signal_IgA), Min=min(aPL_signal_IgA), Median=median(aPL_signal_IgA), Std=sd(aPL_signal_IgA),Q1=quantile(aPL_signal_IgA, probs = 0.25), Q3=quantile(aPL_signal_IgA, probs = 0.75),na.rm = TRUE) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP') %>%
  dplyr::left_join(data_vertical_summary_IgA_map, by = 'MAP') %>%
  dplyr::mutate(Median_change = round(Median/Median_baseline,2))
subset(data_vertical,CoV2_type == 'Spike_IgG') %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  ggboxplot(x = "COVID_vaccination_Group", y = "aPL_signal",
    color = "COVID_vaccination_Group", palette = "jco", outlier.shape = NA, add = "mean",
    merge = TRUE
  ) +
    geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
    geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
    stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
                 aes(group = COVID_vaccination_Group), color = "darkred",
                 position = position_dodge(width = 0.8)) +
    stat_summary(fun = mean, colour = "red", 
                 position = position_dodge(width = 0.8),
                 geom = "text", vjust = -0.7, 
                 aes(label = round(..y.., digits = 1), group = COVID_vaccination_Group)) +
    guides(x =  guide_axis(angle = 90)) +
    facet_wrap(~Isotype,ncol=3) +
    theme(axis.text.y = element_text(size = 8)) +
    labs(y="aPL level",x="Inf, vacc, infl",title="Infected versus vaccianted versus influenza")
## Warning: Removed 140 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 140 rows containing non-finite values (`stat_summary()`).
## Removed 140 rows containing non-finite values (`stat_summary()`).
## Removed 140 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 140 rows containing missing values (`geom_point()`).

7.2 We do it for all individually

### We conduct the statistics and save tables

# IgM
aPL_IgM_stats <-
compare_means(aPL_signal_IgM ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG' & Isotype == 'IgM'), method = "wilcox", group.by = "Target", symnum.args=signifiance_stars, p.adjust.method = "BH", ref.group = '01_Non-infected/non-vaccinated') %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)

# IgG
aPL_IgG_stats <-
compare_means(aPL_signal_IgG ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG' & Isotype == 'IgG'), method = "wilcox", group.by = "Target", symnum.args=signifiance_stars, p.adjust.method = "BH", ref.group = '01_Non-infected/non-vaccinated') %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)

# IgA
aPL_IgA_stats <-
compare_means(aPL_signal_IgA ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG' & Isotype == 'IgA'), method = "wilcox", group.by = "Target", symnum.args=signifiance_stars, p.adjust.method = "BH", ref.group = '01_Non-infected/non-vaccinated') %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)

###  We calculate the fold change from baseline (non-infected/non-vaccinated), for all isotypes and targets separately

# IgM

data_vertical_summary_IgM_map_indiv <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgM') %>%
  dplyr::filter(COVID_vaccination_Group=='01_Non-infected/non-vaccinated') %>%
  dplyr::group_by(COVID_vaccination_Group, Target) %>%
  dplyr::summarise(Median_baseline=median(aPL_signal_IgM)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP')
## `summarise()` has grouped output by 'COVID_vaccination_Group'. You can override
## using the `.groups` argument.
data_vertical_summary_IgM_indiv <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgM') %>%
  dplyr::group_by(COVID_vaccination_Group, Target) %>%
  dplyr::summarise(Mean=mean(aPL_signal_IgM), Max=max(aPL_signal_IgM), Min=min(aPL_signal_IgM), Median=median(aPL_signal_IgM), Std=sd(aPL_signal_IgM),Q1=quantile(aPL_signal_IgM, probs = 0.25), Q3=quantile(aPL_signal_IgM, probs = 0.75)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP') %>%
  dplyr::left_join(data_vertical_summary_IgM_map_indiv, by = 'Target') %>%
  dplyr::mutate(Median_change = round(Median/Median_baseline,2))
## `summarise()` has grouped output by 'COVID_vaccination_Group'. You can override
## using the `.groups` argument.
# IgG

data_vertical_summary_IgG_map_indiv <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgG') %>%
  dplyr::filter(COVID_vaccination_Group=='01_Non-infected/non-vaccinated') %>%
  dplyr::group_by(COVID_vaccination_Group, Target) %>%
  dplyr::summarise(Median_baseline=median(aPL_signal_IgG)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP')
## `summarise()` has grouped output by 'COVID_vaccination_Group'. You can override
## using the `.groups` argument.
data_vertical_summary_IgG_indiv <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgG') %>%
  dplyr::group_by(COVID_vaccination_Group, Target) %>%
  dplyr::summarise(Mean=mean(aPL_signal_IgG), Max=max(aPL_signal_IgG), Min=min(aPL_signal_IgG), Median=median(aPL_signal_IgG), Std=sd(aPL_signal_IgG),Q1=quantile(aPL_signal_IgG, probs = 0.25), Q3=quantile(aPL_signal_IgG, probs = 0.75)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP') %>%
  dplyr::left_join(data_vertical_summary_IgG_map_indiv, by = 'Target') %>%
  dplyr::mutate(Median_change = round(Median/Median_baseline,2))
## `summarise()` has grouped output by 'COVID_vaccination_Group'. You can override
## using the `.groups` argument.
# IgA

data_vertical_summary_IgA_map_indiv <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgA') %>%
  dplyr::filter(COVID_vaccination_Group=='01_Non-infected/non-vaccinated') %>%
  dplyr::filter(!is.na(as.numeric(aPL_signal))) %>%
  dplyr::group_by(COVID_vaccination_Group, Target) %>%
  dplyr::summarise(Median_baseline=median(aPL_signal_IgA)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP')
## `summarise()` has grouped output by 'COVID_vaccination_Group'. You can override
## using the `.groups` argument.
data_vertical_summary_IgA_indiv <- data_vertical %>%
  dplyr::filter(CoV2_type == 'Spike_IgG' & Isotype == 'IgA') %>%
  dplyr::filter(!is.na(as.numeric(aPL_signal))) %>%
  dplyr::group_by(COVID_vaccination_Group, Target) %>%
  dplyr::summarise(Mean=mean(aPL_signal_IgA), Max=max(aPL_signal_IgA), Min=min(aPL_signal_IgA), Median=median(aPL_signal_IgA), Std=sd(aPL_signal_IgA),Q1=quantile(aPL_signal_IgA, probs = 0.25), Q3=quantile(aPL_signal_IgA, probs = 0.75)) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(MAP = 'MAP') %>%
  dplyr::left_join(data_vertical_summary_IgA_map_indiv, by = 'Target') %>%
  dplyr::mutate(Median_change = round(Median/Median_baseline,2))
## `summarise()` has grouped output by 'COVID_vaccination_Group'. You can override
## using the `.groups` argument.
subset(data_vertical,CoV2_type == 'Spike_IgG') %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("CL"), "A.CL")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PA"), "B.PA")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PC"), "C.PC")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PE"), "D.PE")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PG"), "E.PG")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PL"), "F.PI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PS"), "G.PS")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "H.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "I.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "J.PT")) %>%
  ggboxplot(x = "COVID_vaccination_Group", y = "aPL_signal",
    color = "COVID_vaccination_Group", palette = "jco", outlier.shape = NA, add = "mean",
    merge = TRUE
  ) +
    geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
    geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
    #stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
    #             aes(group = COVID_vaccination_Group), color = "darkred",
    #             position = position_dodge(width = 0.8)) +
    #stat_summary(fun = mean, colour = "red", 
    #             position = position_dodge(width = 0.8),
    #             geom = "text", vjust = -0.7, 
    #             aes(label = round(..y.., digits = 1), group = COVID_vaccination_Group)) +
    guides(x =  guide_axis(angle = 90)) +
    facet_grid(rows=vars(Isotype), cols= vars(Target), scales='free_y', switch='y') +
    theme(axis.text.y = element_text(size = 8)) +
    labs(y="aPL level",x="Inf, vacc, infl",title="Infected versus vaccianted versus influenza")
## Warning: Removed 140 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 140 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 140 rows containing missing values (`geom_point()`).

7.3 We do it specifically for PT_IgM, B2GPI_IgM, AnV_IgM

# PT IgM
compare_means(aPL_signal_IgM ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG', aPL_type='PT_IgM'), method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgM     02_Infected/non-vaccinated
## 2  aPL_signal_IgM     02_Infected/non-vaccinated
## 3  aPL_signal_IgM     03_Non-infected/vaccinated
## 4  aPL_signal_IgM     02_Infected/non-vaccinated
## 5  aPL_signal_IgM     03_Non-infected/vaccinated
## 6  aPL_signal_IgM     03_Non-infected/vaccinated
## 7  aPL_signal_IgM     03_Non-infected/vaccinated
## 8  aPL_signal_IgM         04_Infected/vaccinated
## 9  aPL_signal_IgM         04_Infected/vaccinated
## 10 aPL_signal_IgM 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1            01_Non-infected/non-vaccinated 3.847755e-30 3.8e-29  < 2e-16
## 2  00_Influenza_Non-infected/non-vaccinated 7.481763e-12 3.7e-11  7.5e-12
## 3            01_Non-infected/non-vaccinated 6.785810e-11 2.3e-10  6.8e-11
## 4                    04_Infected/vaccinated 3.880197e-10 9.7e-10  3.9e-10
## 5                02_Infected/non-vaccinated 4.006709e-05 8.0e-05  4.0e-05
## 6  00_Influenza_Non-infected/non-vaccinated 5.799304e-05 9.7e-05  5.8e-05
## 7                    04_Infected/vaccinated 2.371708e-03 3.4e-03   0.0024
## 8            01_Non-infected/non-vaccinated 3.093236e-02 3.9e-02   0.0309
## 9  00_Influenza_Non-infected/non-vaccinated 3.288277e-01 3.7e-01   0.3288
## 10 00_Influenza_Non-infected/non-vaccinated 5.539233e-01 5.5e-01   0.5539
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5       *** Wilcoxon
## 6       *** Wilcoxon
## 7         * Wilcoxon
## 8        ns Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# PT IgG
compare_means(aPL_signal_IgG ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'),aPL_type='PT_IgG', method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgG     03_Non-infected/vaccinated
## 2  aPL_signal_IgG     02_Infected/non-vaccinated
## 3  aPL_signal_IgG     03_Non-infected/vaccinated
## 4  aPL_signal_IgG     02_Infected/non-vaccinated
## 5  aPL_signal_IgG     03_Non-infected/vaccinated
## 6  aPL_signal_IgG     02_Infected/non-vaccinated
## 7  aPL_signal_IgG         04_Infected/vaccinated
## 8  aPL_signal_IgG         04_Infected/vaccinated
## 9  aPL_signal_IgG     03_Non-infected/vaccinated
## 10 aPL_signal_IgG 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1            01_Non-infected/non-vaccinated 4.034598e-14 4.0e-13  4.0e-14
## 2            01_Non-infected/non-vaccinated 1.362165e-13 6.8e-13  1.4e-13
## 3  00_Influenza_Non-infected/non-vaccinated 1.522152e-11 5.1e-11  1.5e-11
## 4  00_Influenza_Non-infected/non-vaccinated 2.179669e-10 5.4e-10  2.2e-10
## 5                    04_Infected/vaccinated 2.197494e-05 4.4e-05  2.2e-05
## 6                    04_Infected/vaccinated 2.390369e-04 4.0e-04  0.00024
## 7  00_Influenza_Non-infected/non-vaccinated 1.120462e-02 1.6e-02  0.01120
## 8            01_Non-infected/non-vaccinated 6.417790e-02 8.0e-02  0.06418
## 9                02_Infected/non-vaccinated 1.057361e-01 1.2e-01  0.10574
## 10 00_Influenza_Non-infected/non-vaccinated 2.448137e-01 2.4e-01  0.24481
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5       *** Wilcoxon
## 6        ** Wilcoxon
## 7        ns Wilcoxon
## 8        ns Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# PT IgA
compare_means(aPL_signal_IgA ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'), aPL_type='PT_IgA', method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgA     03_Non-infected/vaccinated
## 2  aPL_signal_IgA     02_Infected/non-vaccinated
## 3  aPL_signal_IgA     03_Non-infected/vaccinated
## 4  aPL_signal_IgA     03_Non-infected/vaccinated
## 5  aPL_signal_IgA     03_Non-infected/vaccinated
## 6  aPL_signal_IgA         04_Infected/vaccinated
## 7  aPL_signal_IgA         04_Infected/vaccinated
## 8  aPL_signal_IgA     02_Infected/non-vaccinated
## 9  aPL_signal_IgA     02_Infected/non-vaccinated
## 10 aPL_signal_IgA 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1                    04_Infected/vaccinated 6.727498e-44 6.7e-43  < 2e-16
## 2                    04_Infected/vaccinated 3.410657e-27 1.7e-26  < 2e-16
## 3                02_Infected/non-vaccinated 5.590315e-22 1.9e-21  < 2e-16
## 4  00_Influenza_Non-infected/non-vaccinated 5.285688e-20 1.3e-19  < 2e-16
## 5            01_Non-infected/non-vaccinated 1.222192e-14 2.4e-14  1.2e-14
## 6  00_Influenza_Non-infected/non-vaccinated 5.583016e-13 9.3e-13  5.6e-13
## 7            01_Non-infected/non-vaccinated 8.059492e-05 1.2e-04  8.1e-05
## 8  00_Influenza_Non-infected/non-vaccinated 2.393623e-03 3.0e-03   0.0024
## 9            01_Non-infected/non-vaccinated 2.356272e-01 2.6e-01   0.2356
## 10 00_Influenza_Non-infected/non-vaccinated 4.129146e-01 4.1e-01   0.4129
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5      **** Wilcoxon
## 6      **** Wilcoxon
## 7       *** Wilcoxon
## 8         * Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# B2GPI IgM
compare_means(aPL_signal_IgM ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG', aPL_type='ß2GPI_IgM'), method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgM     02_Infected/non-vaccinated
## 2  aPL_signal_IgM     02_Infected/non-vaccinated
## 3  aPL_signal_IgM     03_Non-infected/vaccinated
## 4  aPL_signal_IgM     02_Infected/non-vaccinated
## 5  aPL_signal_IgM     03_Non-infected/vaccinated
## 6  aPL_signal_IgM     03_Non-infected/vaccinated
## 7  aPL_signal_IgM     03_Non-infected/vaccinated
## 8  aPL_signal_IgM         04_Infected/vaccinated
## 9  aPL_signal_IgM         04_Infected/vaccinated
## 10 aPL_signal_IgM 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1            01_Non-infected/non-vaccinated 3.847755e-30 3.8e-29  < 2e-16
## 2  00_Influenza_Non-infected/non-vaccinated 7.481763e-12 3.7e-11  7.5e-12
## 3            01_Non-infected/non-vaccinated 6.785810e-11 2.3e-10  6.8e-11
## 4                    04_Infected/vaccinated 3.880197e-10 9.7e-10  3.9e-10
## 5                02_Infected/non-vaccinated 4.006709e-05 8.0e-05  4.0e-05
## 6  00_Influenza_Non-infected/non-vaccinated 5.799304e-05 9.7e-05  5.8e-05
## 7                    04_Infected/vaccinated 2.371708e-03 3.4e-03   0.0024
## 8            01_Non-infected/non-vaccinated 3.093236e-02 3.9e-02   0.0309
## 9  00_Influenza_Non-infected/non-vaccinated 3.288277e-01 3.7e-01   0.3288
## 10 00_Influenza_Non-infected/non-vaccinated 5.539233e-01 5.5e-01   0.5539
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5       *** Wilcoxon
## 6       *** Wilcoxon
## 7         * Wilcoxon
## 8        ns Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# B2GPI IgG
compare_means(aPL_signal_IgG ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'),aPL_type='ß2GPI_IgG', method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgG     03_Non-infected/vaccinated
## 2  aPL_signal_IgG     02_Infected/non-vaccinated
## 3  aPL_signal_IgG     03_Non-infected/vaccinated
## 4  aPL_signal_IgG     02_Infected/non-vaccinated
## 5  aPL_signal_IgG     03_Non-infected/vaccinated
## 6  aPL_signal_IgG     02_Infected/non-vaccinated
## 7  aPL_signal_IgG         04_Infected/vaccinated
## 8  aPL_signal_IgG         04_Infected/vaccinated
## 9  aPL_signal_IgG     03_Non-infected/vaccinated
## 10 aPL_signal_IgG 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1            01_Non-infected/non-vaccinated 4.034598e-14 4.0e-13  4.0e-14
## 2            01_Non-infected/non-vaccinated 1.362165e-13 6.8e-13  1.4e-13
## 3  00_Influenza_Non-infected/non-vaccinated 1.522152e-11 5.1e-11  1.5e-11
## 4  00_Influenza_Non-infected/non-vaccinated 2.179669e-10 5.4e-10  2.2e-10
## 5                    04_Infected/vaccinated 2.197494e-05 4.4e-05  2.2e-05
## 6                    04_Infected/vaccinated 2.390369e-04 4.0e-04  0.00024
## 7  00_Influenza_Non-infected/non-vaccinated 1.120462e-02 1.6e-02  0.01120
## 8            01_Non-infected/non-vaccinated 6.417790e-02 8.0e-02  0.06418
## 9                02_Infected/non-vaccinated 1.057361e-01 1.2e-01  0.10574
## 10 00_Influenza_Non-infected/non-vaccinated 2.448137e-01 2.4e-01  0.24481
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5       *** Wilcoxon
## 6        ** Wilcoxon
## 7        ns Wilcoxon
## 8        ns Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# B2GPI IgA
compare_means(aPL_signal_IgA ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'), aPL_type='ß2GPI_IgA', method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgA     03_Non-infected/vaccinated
## 2  aPL_signal_IgA     02_Infected/non-vaccinated
## 3  aPL_signal_IgA     03_Non-infected/vaccinated
## 4  aPL_signal_IgA     03_Non-infected/vaccinated
## 5  aPL_signal_IgA     03_Non-infected/vaccinated
## 6  aPL_signal_IgA         04_Infected/vaccinated
## 7  aPL_signal_IgA         04_Infected/vaccinated
## 8  aPL_signal_IgA     02_Infected/non-vaccinated
## 9  aPL_signal_IgA     02_Infected/non-vaccinated
## 10 aPL_signal_IgA 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1                    04_Infected/vaccinated 6.727498e-44 6.7e-43  < 2e-16
## 2                    04_Infected/vaccinated 3.410657e-27 1.7e-26  < 2e-16
## 3                02_Infected/non-vaccinated 5.590315e-22 1.9e-21  < 2e-16
## 4  00_Influenza_Non-infected/non-vaccinated 5.285688e-20 1.3e-19  < 2e-16
## 5            01_Non-infected/non-vaccinated 1.222192e-14 2.4e-14  1.2e-14
## 6  00_Influenza_Non-infected/non-vaccinated 5.583016e-13 9.3e-13  5.6e-13
## 7            01_Non-infected/non-vaccinated 8.059492e-05 1.2e-04  8.1e-05
## 8  00_Influenza_Non-infected/non-vaccinated 2.393623e-03 3.0e-03   0.0024
## 9            01_Non-infected/non-vaccinated 2.356272e-01 2.6e-01   0.2356
## 10 00_Influenza_Non-infected/non-vaccinated 4.129146e-01 4.1e-01   0.4129
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5      **** Wilcoxon
## 6      **** Wilcoxon
## 7       *** Wilcoxon
## 8         * Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# AnV IgM
compare_means(aPL_signal_IgM ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG', aPL_type='AnnV_IgM'), method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgM     02_Infected/non-vaccinated
## 2  aPL_signal_IgM     02_Infected/non-vaccinated
## 3  aPL_signal_IgM     03_Non-infected/vaccinated
## 4  aPL_signal_IgM     02_Infected/non-vaccinated
## 5  aPL_signal_IgM     03_Non-infected/vaccinated
## 6  aPL_signal_IgM     03_Non-infected/vaccinated
## 7  aPL_signal_IgM     03_Non-infected/vaccinated
## 8  aPL_signal_IgM         04_Infected/vaccinated
## 9  aPL_signal_IgM         04_Infected/vaccinated
## 10 aPL_signal_IgM 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1            01_Non-infected/non-vaccinated 3.847755e-30 3.8e-29  < 2e-16
## 2  00_Influenza_Non-infected/non-vaccinated 7.481763e-12 3.7e-11  7.5e-12
## 3            01_Non-infected/non-vaccinated 6.785810e-11 2.3e-10  6.8e-11
## 4                    04_Infected/vaccinated 3.880197e-10 9.7e-10  3.9e-10
## 5                02_Infected/non-vaccinated 4.006709e-05 8.0e-05  4.0e-05
## 6  00_Influenza_Non-infected/non-vaccinated 5.799304e-05 9.7e-05  5.8e-05
## 7                    04_Infected/vaccinated 2.371708e-03 3.4e-03   0.0024
## 8            01_Non-infected/non-vaccinated 3.093236e-02 3.9e-02   0.0309
## 9  00_Influenza_Non-infected/non-vaccinated 3.288277e-01 3.7e-01   0.3288
## 10 00_Influenza_Non-infected/non-vaccinated 5.539233e-01 5.5e-01   0.5539
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5       *** Wilcoxon
## 6       *** Wilcoxon
## 7         * Wilcoxon
## 8        ns Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# AnV IgG
compare_means(aPL_signal_IgG ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'),aPL_type='AnnV_IgG', method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgG     03_Non-infected/vaccinated
## 2  aPL_signal_IgG     02_Infected/non-vaccinated
## 3  aPL_signal_IgG     03_Non-infected/vaccinated
## 4  aPL_signal_IgG     02_Infected/non-vaccinated
## 5  aPL_signal_IgG     03_Non-infected/vaccinated
## 6  aPL_signal_IgG     02_Infected/non-vaccinated
## 7  aPL_signal_IgG         04_Infected/vaccinated
## 8  aPL_signal_IgG         04_Infected/vaccinated
## 9  aPL_signal_IgG     03_Non-infected/vaccinated
## 10 aPL_signal_IgG 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1            01_Non-infected/non-vaccinated 4.034598e-14 4.0e-13  4.0e-14
## 2            01_Non-infected/non-vaccinated 1.362165e-13 6.8e-13  1.4e-13
## 3  00_Influenza_Non-infected/non-vaccinated 1.522152e-11 5.1e-11  1.5e-11
## 4  00_Influenza_Non-infected/non-vaccinated 2.179669e-10 5.4e-10  2.2e-10
## 5                    04_Infected/vaccinated 2.197494e-05 4.4e-05  2.2e-05
## 6                    04_Infected/vaccinated 2.390369e-04 4.0e-04  0.00024
## 7  00_Influenza_Non-infected/non-vaccinated 1.120462e-02 1.6e-02  0.01120
## 8            01_Non-infected/non-vaccinated 6.417790e-02 8.0e-02  0.06418
## 9                02_Infected/non-vaccinated 1.057361e-01 1.2e-01  0.10574
## 10 00_Influenza_Non-infected/non-vaccinated 2.448137e-01 2.4e-01  0.24481
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5       *** Wilcoxon
## 6        ** Wilcoxon
## 7        ns Wilcoxon
## 8        ns Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
# AnV IgA
compare_means(aPL_signal_IgA ~ COVID_vaccination_Group,  data=subset(data_vertical, CoV2_type == 'Spike_IgG'), aPL_type='AnnV_IgA', method = "wilcox", symnum.args=signifiance_stars, p.adjust.method = "BH") %>%
  as.data.frame() %>%
  dplyr::arrange(p.adj)
##               .y.                         group1
## 1  aPL_signal_IgA     03_Non-infected/vaccinated
## 2  aPL_signal_IgA     02_Infected/non-vaccinated
## 3  aPL_signal_IgA     03_Non-infected/vaccinated
## 4  aPL_signal_IgA     03_Non-infected/vaccinated
## 5  aPL_signal_IgA     03_Non-infected/vaccinated
## 6  aPL_signal_IgA         04_Infected/vaccinated
## 7  aPL_signal_IgA         04_Infected/vaccinated
## 8  aPL_signal_IgA     02_Infected/non-vaccinated
## 9  aPL_signal_IgA     02_Infected/non-vaccinated
## 10 aPL_signal_IgA 01_Non-infected/non-vaccinated
##                                      group2            p   p.adj p.format
## 1                    04_Infected/vaccinated 6.727498e-44 6.7e-43  < 2e-16
## 2                    04_Infected/vaccinated 3.410657e-27 1.7e-26  < 2e-16
## 3                02_Infected/non-vaccinated 5.590315e-22 1.9e-21  < 2e-16
## 4  00_Influenza_Non-infected/non-vaccinated 5.285688e-20 1.3e-19  < 2e-16
## 5            01_Non-infected/non-vaccinated 1.222192e-14 2.4e-14  1.2e-14
## 6  00_Influenza_Non-infected/non-vaccinated 5.583016e-13 9.3e-13  5.6e-13
## 7            01_Non-infected/non-vaccinated 8.059492e-05 1.2e-04  8.1e-05
## 8  00_Influenza_Non-infected/non-vaccinated 2.393623e-03 3.0e-03   0.0024
## 9            01_Non-infected/non-vaccinated 2.356272e-01 2.6e-01   0.2356
## 10 00_Influenza_Non-infected/non-vaccinated 4.129146e-01 4.1e-01   0.4129
##    p.signif   method
## 1      **** Wilcoxon
## 2      **** Wilcoxon
## 3      **** Wilcoxon
## 4      **** Wilcoxon
## 5      **** Wilcoxon
## 6      **** Wilcoxon
## 7       *** Wilcoxon
## 8         * Wilcoxon
## 9        ns Wilcoxon
## 10       ns Wilcoxon
subset(data_vertical, 
              CoV2_type == 'Spike_IgG' &
              Target == 'PT' | 
              Target == 'ß2GPI' |
              Target == 'AnnV') %>%
  dplyr::arrange(COVID_vaccination_Group) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("AnnV"), "C.AnV")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("ß2GPI"), "B.ß2GPI")) %>%
  dplyr::mutate(Target = str_replace_all(Target, fixed("PT"), "A.PT")) %>%
ggboxplot(x = "COVID_vaccination_Group", y = "aPL_signal",
  color = "COVID_vaccination_Group", palette = "jco", outlier.shape = NA, add = "mean",
  merge = TRUE
) +
  geom_point(shape=16, position=position_jitterdodge(jitter.width = 0.1), aes(color = COVID_vaccination_Group)) +
  geom_hline(yintercept=50, linetype="dashed", color = "red", size=1) +
  stat_summary(fun = mean, geom = "point", shape = 18, size = 3,
               aes(group = COVID_vaccination_Group), color = "darkred",
               position = position_dodge(width = 0.8)) +
  stat_summary(fun = mean, colour = "red", 
               position = position_dodge(width = 0.8),
               geom = "text", vjust = -0.7, 
               aes(label = round(..y.., digits = 1), group = COVID_vaccination_Group)) +
  guides(x =  guide_axis(angle = 90)) +
  facet_wrap(~interaction(Target, Isotype),ncol=3) +
  theme(axis.text.y = element_text(size = 8)) +
  labs(y="aPL level",x="Inf, vacc, infl",title="Infected versus vaccianted versus influenza PT, B2GPI, and AnV")
## Warning: Removed 42 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 42 rows containing non-finite values (`stat_summary()`).
## Removed 42 rows containing non-finite values (`stat_summary()`).
## Removed 42 rows containing non-finite values (`stat_summary()`).
## Warning: Removed 42 rows containing missing values (`geom_point()`).

8 THROMBOTIC EVENTS

Are thrombotic events linked to the occurrence of aPL? We have not, previously, linked the finding to a clinical phenotype. Thrombotic events have been described in the frame of and have been associated with infection with SARS-CoV-2.

  • There is evidence that the overreacting immune system, as a response to infection, elicits autoantibodies, including antiphospholipid antibodies.
  • The link between antiphospholipid antibodies and thromboses is well established.
  • Others have focused more on cytokines and a state of hyperinflammation, which may predispose to thromboses.
  • Interfering with complement and the coagulation cascade may result in higher chances of thromboses.

Here, we may have the chance to identify such an association. Mostly, we aim to identify what features - clinical, demographic, or molecular - predict the occurrence of thrombotic events.

However, it shall be noted that one particular model is unlikely to fully reflect the relationships among the data. We will therefore employ multiple models and report on how they compare with each other, to gain more confidence in the data relation we might discover.

One more important disclaimer: For this analysis, we will omit the patients for which we have NA values (stemming from the cytokine dataset). There is a trade-off between including more/all patients/samples and including more/all features.

8.1 We prepare the data for various regression models

data_LR <- data_aPL_regr %>% 
  dplyr::select(-Unique_sample_ID_for_study,
                -aPL_IgG_rowmean,
                -aPL_IgA_rowmean,
                -aPL_IgM_rowmean
                ) %>%
  na.omit() #We omit the patients with NA values (in the cytokines) - we do it here to avoid problems further downstream. There is a tradeoff between excluding patients/samples and including parameters... If we excluded the cytokines from these analyses, we could include all patients/samples.

#data_LR$Thrombosis_group = as.factor(data_LR$Thrombosis_group) # If we want to use a binomial model
#data_LR$SeverityCoV2_or_Flu = as.factor(data_LR$SeverityCoV2_or_Flu) #If we wanted to use this as categorical variable
data_LR$Sex = as.factor(data_LR$Sex)

# We scale the numeric data
#data_LR %>% 
#  dplyr::mutate_at(c(2:39,41,47:49), scale)

data_LR_collin <- data_LR %>%  # Removal of some highly collinear features
                dplyr::select(Thrombosis_group,
                CL_IgG:PT_IgA, 
                PC1_IgG, PC1_IgA, NC_IgG, NC_IgA, Spike_IgG, Spike_IgA, # we omit RBD as they are highly collinear with Spike
                Sex, Age,
                SeverityCoV2_or_Flu_int, Acute_SARS_CoV_2_infection,
                Anticoagulation.chronic, # we omit at event
                Platelet.aggregation.inhibitor.at.event,#we omit chronic
                Immunosuppressed_admission, Vaccination_statusonly_CoV2)

data_LR_aPL <- data_LR %>%  # only aPL with age and sex
                dplyr::select(Thrombosis_group,
                CL_IgG:PT_IgA, 
                Sex, Age)

data_LR_CoV2 <- data_LR %>%  # only SARS-COV-2 antibodies with age and sex
                dplyr::select(Thrombosis_group,
                NC_IgG, NC_IgA, Spike_IgG, Spike_IgA, RBD_IgG, RBD_IgA,
                Sex, Age)

data_LR_CK <- data_LR %>%  # only SARS-COV-2 antibodies with age and sex
                dplyr::select(Thrombosis_group,
                GCSF:Inflammatory_index,
                Sex, Age)

sapply(data_LR, class)
##                        Thrombosis_group                                  CL_IgG 
##                               "numeric"                               "numeric" 
##                                  PA_IgG                                  PC_IgG 
##                               "numeric"                               "numeric" 
##                                  PE_IgG                                  PG_IgG 
##                               "numeric"                               "numeric" 
##                                  PI_IgG                                  PS_IgG 
##                               "numeric"                               "numeric" 
##                                AnnV_IgG                               ß2GPI_IgG 
##                               "numeric"                               "numeric" 
##                                  PT_IgG                                  CL_IgM 
##                               "numeric"                               "numeric" 
##                                  PA_IgM                                  PC_IgM 
##                               "numeric"                               "numeric" 
##                                  PE_IgM                                  PG_IgM 
##                               "numeric"                               "numeric" 
##                                  PI_IgM                                  PS_IgM 
##                               "numeric"                               "numeric" 
##                                AnnV_IgM                               ß2GPI_IgM 
##                               "numeric"                               "numeric" 
##                                  PT_IgM                                  CL_IgA 
##                               "numeric"                               "numeric" 
##                                  PA_IgA                                  PC_IgA 
##                               "numeric"                               "numeric" 
##                                  PE_IgA                                  PG_IgA 
##                               "numeric"                               "numeric" 
##                                  PI_IgA                                  PS_IgA 
##                               "numeric"                               "numeric" 
##                                AnnV_IgA                               ß2GPI_IgA 
##                               "numeric"                               "numeric" 
##                                  PT_IgA                               Spike_IgG 
##                               "numeric"                               "numeric" 
##                                 RBD_IgG                                  NC_IgG 
##                               "numeric"                               "numeric" 
##                               Spike_IgA                                 RBD_IgA 
##                               "numeric"                               "numeric" 
##                                  NC_IgA                                 PC1_IgG 
##                               "numeric"                               "numeric" 
##                                 PC1_IgA                                    GCSF 
##                               "numeric"                               "numeric" 
##                                   GMCSF                                IFNalpha 
##                               "numeric"                               "numeric" 
##                                IFNgamma                                 IL1beta 
##                               "numeric"                               "numeric" 
##                                     IL4                                     IL6 
##                               "numeric"                               "numeric" 
##                                     IL8                                    IL10 
##                               "numeric"                               "numeric" 
##                                   IL17A                                    IP10 
##                               "numeric"                               "numeric" 
##                               MIP1alpha                                MIP1beta 
##                               "numeric"                               "numeric" 
##                               S100A8_A9                               SDF1alpha 
##                               "numeric"                               "numeric" 
##                                TNFalpha                      Inflammatory_index 
##                               "numeric"                               "numeric" 
##                                     Sex                                     Age 
##                                "factor"                               "numeric" 
##              Acute_SARS_CoV_2_infection                Anticoagulation.at.event 
##                               "logical"                               "logical" 
##                 Anticoagulation.chronic Platelet.aggregation.inhibitor.at.event 
##                               "logical"                               "logical" 
##  Platelet.aggregation.inhibitor.chronic              Immunosuppressed_admission 
##                               "logical"                               "numeric" 
##             Vaccination_statusonly_CoV2                 SeverityCoV2_or_Flu_int 
##                               "numeric"                               "numeric"

Notes:

  • DPO is not included as a feature as there are NA values (for individuals who had never had the disease).
  • Boolean: Acute_SARS_CoV_2 infection, Anticoagulation.at.event, Anticoagulation.chronic, Platelet.aggregation.inhibitor.at.event, Platelet.aggregation.inhibitor.chronic,
  • Factors: Sex
  • Numeric: all other entries
  • Outcome: numeric (Bernoulli, 0 and 1); we could factorise them into ‘No’ and ‘Yes’ to run the binomial model with logit link function. But the binomial model works just well with 0 and 1, too, if it does at all.

Our output variable is confined to either 0 (‘No’) or 1 (‘Yes’) and as such, is representing a Bernoulli distribution, i.e. has binary outcome. The families used in the glm approaches are nicely explained here:

In our case, for feature selection, the employment of a conventional glm with logit link function will most likely result in lots of statistical fluctuations, particularly because the large interdependence of the features (see comments on collinear features further below). We will test it out.

We primarily wish to employ binomial (Bernoulli) model with logit link function, i.e. multiple logistic regression. Alternatively, to make the model more robust, we might be using Gaussian family with identity link instead of a binomial with a logit link function. In this sense, we essentially employ an ordinary linear model (see e.g. here: https://stats.stackexchange.com/questions/94852/glm-gaussian-vs-glm-binomial-vs-log-link-glm-gaussian). The usage of the Gaussian family - with the outcome variable as 0s and 1s as integers - has shown to make the results comparable among the different approaches used in the next sections. However, this approach would obviously not deliver Odds Ratios as no logit transform and exponentiation could be performed. Linear and logistic regression are often interchangeable (for binary outcome variables) in terms of p-value outcome, as suggested by Gomila et al, 2021, Logistic or linear? Estimating causal effects of experimental treatments on binary outcomes using regression analysis (https://doi.org/10.1037/xge0000920).

8.2 We assess correlation and multicollinearity

set.seed(1)
dend_data <- data.matrix(data_LR[c(2:66)]) #S100A8/A9 entirely skews the scale of the data.
dend_data_transp <- t(dend_data)
dend <- dend_data_transp %>% dist(method = "euclidean") %>% hclust(method='complete') %>% as.dendrogram
ggdendrogram(dend, rotate = FALSE, size = 2)

## Make QC plots to inspect data for potentially highly correlated data
## We use a simple LOESS function to observe trends in data

# SARS-CoV-2 antibodies and disease severity

pairs.panels(data_LR[c(32:39, 66)])

# PAI, anticoagulation, acute infection

pairs.panels(data_LR[c(59:65)])

# Cytokines

pairs.panels(data_LR[c(40:56)])

data_LR_Correlogr <- data_LR %>%
                dplyr::select(-PC_IgG, -PE_IgG, -PC_IgM, -Sex)

data_LR_Correlogr_matrix <- rcorr(as.matrix(data_LR_Correlogr),type="spearman")

p.mat_all <- cor.mtest(data_LR_Correlogr)

corrplot(data_LR_Correlogr_matrix$r, method="ellipse",
         col=brewer.pal(n=10, name="PuOr"), tl.col="black", tl.srt=45, tl.cex = 0.5,
         p.mat = p.mat_all, insig = "label_sig",
         sig.level = c(0.00001, 0.0001, 0.001, 0.01), pch.cex = 0.5,
         pch.col = 'red', type='lower')

Based on these observations

  1. Spike_IgG, RBD_IgG, NC_IgG are highly correlated with PC1_IgG;
  2. Spike_IgA, RBD_IgA, and a bit less so, NC_IgA are highly correlated with PC1_IgA;
  3. PC1_IgG and PC1_IgA are significantly correlated and probably collinear;
  4. The only feature non-significantly correlated with disease severity in the present plot may be NC_IgA;
  5. Acute infection, anticoagulation, and PAI could be used but we would restrict to anticoagulation chronic and PAI at event, dropping the event/chronic condition as they are mostly correlated and probably collinear.

We aim to challenge some of these observations using a Choleski decomposition on the correlation matrix, which includes most of these features. Choleski decomposition is a decomposition of a Hermitian positive-definite matrix into an upper (here) or lower (not used here) triangular matrix (i.e. for upper, only 0 below the diagonal) and a Hermitian transpose.

Some articles discussing aspects of Choleski decomposition:

# We conduct a Choleski decomposition of the correlation matrix

data_LR_Choleski <- data_LR %>%
                dplyr::select(-PC_IgG, -PE_IgG, -PC_IgM, -Sex)

corelation_matrix_Choleski <- rcorr(as.matrix(data_LR_Choleski),type="spearman")
corelation_matrix_Choleski_check <- as.data.frame(corelation_matrix_Choleski$r)
Choleski <- chol(as.matrix(corelation_matrix_Choleski$r))
corrplot(Choleski, method='ellipse', tl.col='black', tl.srt=90, tl.cex=0.5, type='upper')

Note: We might want to combine the corrplot and the choleski decomposition plot; together, they build a whole.

Based on these observations 1. Anticoagulation chronic but especially PAI chronic have values that are closer to 0 but not extremely so; 2. Few of the aPL have values that are close to 0 but none of them extremely so; 3. The anti-SARS-CoV-2 antibodies have decreased scores and show limited collinearity; 4. PC1_IgG and PC1_IgA are clearly collinear, in case that the remaining anti-SARS-CoV-2 antibodies IgG and IgA are used; 5. Disease severity is partially collinear with other features.

There is a fair amount of correlation in the data but limited collinearity.

Overall, we shall therefore use in a dataset subset we call data_LR_collin:

  • Disease severity
  • NC_IgA, NC_IgG, Spike_IgA, Spike_IgG
  • Vaccination status
  • Acute infection (instead of chronic but the opposite would be equally plausible)
  • PAI at event
  • Anticoagulation chronic (instead of at event but the opposite would be equally plausible)
  • The cytokines do not appear as strongly collinear and do not have to be separately adjusted

8.3 Feature selection in all data, clinical, demographic, molecular - some thoughts before we start

Before we start, one important confounder to keep in mind is the expected/hidden collinearity within the predictor variables. We have seen them earlier already in the QC plots. Some of this collinearity is just there or in itself a finding, other collinearity is desired, e.g. PC1_IgG and PC1_IgA are expected to be collinear with Spike, RBD, NC. Spike and RBD are clearly also expected to show in the same direction, including NC but only for those with infection and not those with vaccination. This means:

  • Prediction can be biased due to collinearity.
  • Some predictions may not work with a nice precision because of these confounding factors.
  • However, we are aware of it and can employ measures.

A somewhat nice overview on the problems caused by collinearity are summarised here:

We therefore use multiple approaches for feature selection and will ultimately validate our feature choice. We will employ both frequentist-based tests suitable for testing hypotheses using GLM to perform the likelihood ratio test as well as Bayesian approaches to obtain posterior distributions by fitting a GLM using multiple priors. Additionally, we use machine learning-based random forest regression.

Of note:

  1. We have generated a dataset where we remove some of the parameters that have shown to be highly collinear (data_LR_collin). These parameters are shown in the code.
  2. We use the molecular data (i.e. aPL, SARS-CoV-2 antibodies, or cytokines) alone in some analyses.

8.4 Multivariate adaptive LOGISTIC regression spline (MARS)

We employ multivariate adaptive regression splines for feature selection as they are well suited to capture nonlinear relationships. More thoughts on the methodology can be found e.g. here: https://uc-r.github.io/mars.

Multivariate adaptive regression splines (MARS) provide a modular approach to capture nonlinearity by assessing knots, in a way similar to step functions but more powerfully, by avoiding higher polynomial functions. Data points for each predictor are evaluated as a knot and a linear regression model is fitted from knot to knot whereby nonlinearity can be taken care of.

set.seed(1)
MARS_model <- earth(Thrombosis_group ~ ., data=data_LR, 
                    nfold=10, ncross=100, varmod.method='earth', glm=list(family=binomial,
                                                                          trace=0)) # build model with 10x cross-validation (nfold)
plot(MARS_model)

print(MARS_model)
## GLM (family binomial, link logit):
##  nulldev  df       dev  df   devratio     AIC iters converged
##  135.802 111   39.2501 100      0.711   63.25     7         1
## 
## Earth selected 12 of 21 terms, and 8 of 65 predictors
## Termination condition: RSq changed by less than 0.001 at 21 terms
## Importance: SeverityCoV2_or_Flu_int, RBD_IgA, PC1_IgA, PC1_IgG, GCSF, ...
## Number of terms at each degree of interaction: 1 11 (additive model)
## Earth GCV 0.1080519  RSS 7.641778  GRSq 0.4893335  RSq 0.6716996  CVRSq -0.3071991
MARS_ev <- evimp(MARS_model) # estimate variable importance
print(MARS_ev)
##                         nsubsets   gcv    rss
## SeverityCoV2_or_Flu_int       11 100.0  100.0
## RBD_IgA                       10  69.2   76.2
## PC1_IgA                        9  65.0   71.0
## PC1_IgG                        8  43.4   56.1
## GCSF                           7  32.9   48.4
## Inflammatory_index             6  29.2   43.8
## IL10                           5  24.3   38.8
## IFNalpha                       3  12.4   27.6
plot(MARS_ev)

Observations and conclusions:

  • Disease severity is considered the top-most predictor of thromboses.
  • Antibodies against SARS-CoV-2, in particular RBD but also the compound (PCA) for IgA, are next.

8.6 Multiple logistic regression in Bayesian framework

As we have done in our original manuscript published in iScience (https://doi.org/10.1016/j.isci.2023.105928), we look at the data from a Bayesian viewpoint, using an interface to run a multiple logistic regression in STAN. As we have done before, we employ three different priors, a mostly uninformative prior (Normal(0,10)), a LASSO prior and a regularised horseshoe. We can thereby deal with results in a comparative manner. Using LASSO and regularised horseshoe shrinkage priors, we expect the regression to be more stable than the conventional multiple regression analysis performed above or the one where an uninformative prior is employed. The increased robustness allows us to employ the proper model for the data, i.e. to run a proper logistic regression with binomial family and logit link function. The choice of priors is nicely explained here:

The code was originally designed by Julien Riou, who is a coauthor in the study published here (https://doi.org/10.1016/j.isci.2023.105928). The code is available on Zenodo and will have to be referenced as well (https://doi.org/10.5281/zenodo.7454292).

See also here (https://avehtari.github.io/modelselection/regularizedhorseshoe_slides.pdf) for further inspiration and insights into the method.

For the Bayesian analysis, the data requires a bit of preprocessing as constant variables are not accepted in the analysis. We have to, therefore, exclude:

  • PC_IgG
  • PC_IgM
data_LR_Bayes <- data_LR %>% 
  dplyr::select(-PC_IgG, -PC_IgM
                )

8.6.1 Normal(0,10) uninformative prior

m1_lin = stan_glm(Thrombosis_group~.,
                  family=binomial(link = "logit"),
                  prior=normal(0,10),
                  prior_intercept = normal(0,10),
                  data=data_LR_Bayes)
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0.001 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 10 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 289.546 seconds (Warm-up)
## Chain 1:                227.233 seconds (Sampling)
## Chain 1:                516.779 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 319.493 seconds (Warm-up)
## Chain 2:                217.217 seconds (Sampling)
## Chain 2:                536.71 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 294.187 seconds (Warm-up)
## Chain 3:                202.191 seconds (Sampling)
## Chain 3:                496.378 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 282.651 seconds (Warm-up)
## Chain 4:                202.355 seconds (Sampling)
## Chain 4:                485.006 seconds (Total)
## Chain 4:
summary(m1_lin)
## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      Thrombosis_group ~ .
##  algorithm:    sampling
##  sample:       4000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 112
##  predictors:   64
## 
## Estimates:
##                                               mean   sd     10%    50%    90% 
## (Intercept)                                  -62.2   87.0 -172.4  -61.4   48.2
## CL_IgG                                         0.0    0.8   -1.0    0.0    1.1
## PA_IgG                                        -3.0    1.9   -5.5   -2.9   -0.6
## PE_IgG                                         1.4    8.9  -10.2    1.3   12.8
## PG_IgG                                         1.7    8.1   -8.8    2.0   12.2
## PI_IgG                                        -3.9    3.5   -8.8   -3.2   -0.1
## PS_IgG                                         1.3    1.7   -0.9    1.3    3.3
## AnnV_IgG                                      -0.9    1.7   -3.2   -0.8    1.3
## ß2GPI_IgG                                      0.8    0.7   -0.1    0.8    1.7
## PT_IgG                                        -7.1    2.8  -10.7   -6.8   -3.8
## CL_IgM                                         0.6    1.2   -0.9    0.5    2.1
## PA_IgM                                        -0.6    0.5   -1.3   -0.6    0.0
## PE_IgM                                        -0.1    5.3   -6.9    0.4    6.0
## PG_IgM                                        -0.2    4.2   -5.5   -0.3    5.1
## PI_IgM                                        -4.3    2.6   -7.9   -3.8   -1.5
## PS_IgM                                         2.0    1.3    0.5    2.0    3.7
## AnnV_IgM                                      -0.5    0.4   -1.0   -0.5    0.0
## ß2GPI_IgM                                     -1.9    0.6   -2.6   -1.8   -1.2
## PT_IgM                                         0.7    0.5    0.0    0.7    1.4
## CL_IgA                                         2.1    1.4    0.3    2.0    4.0
## PA_IgA                                        -1.6    2.0   -4.1   -1.5    0.9
## PC_IgA                                         2.0    3.5   -2.5    2.1    6.3
## PE_IgA                                        -1.3    3.5   -5.8   -1.3    3.1
## PG_IgA                                        -1.8    1.6   -3.9   -1.7    0.2
## PI_IgA                                         1.0    2.5   -2.3    1.0    4.2
## PS_IgA                                        -0.6    2.1   -3.2   -0.5    2.1
## AnnV_IgA                                       1.9    0.9    0.8    1.9    3.1
## ß2GPI_IgA                                      0.0    1.0   -1.3    0.0    1.2
## PT_IgA                                        -5.1    2.4   -8.2   -5.0   -2.2
## Spike_IgG                                     12.7    7.7    2.8   12.6   22.5
## RBD_IgG                                      -16.7    7.3  -26.3  -16.7   -7.4
## NC_IgG                                         3.5    7.6   -6.2    3.5   13.2
## Spike_IgA                                     13.6    7.5    3.7   13.5   23.2
## RBD_IgA                                      -24.5    7.1  -33.6  -24.3  -15.5
## NC_IgA                                         9.5    6.5    1.3    9.4   17.7
## PC1_IgG                                       -0.3    7.7   -9.7   -0.4    9.4
## PC1_IgA                                       -5.5    7.9  -15.7   -5.4    4.6
## GCSF                                           0.3    0.3    0.0    0.4    0.7
## GMCSF                                         -0.5    1.0   -1.8   -0.5    0.8
## IFNalpha                                       2.3    6.7   -6.3    2.4   10.9
## IFNgamma                                      -5.3    6.9  -14.3   -5.1    3.0
## IL1beta                                       -1.9    4.8   -8.0   -1.9    4.1
## IL4                                           15.4    3.4   11.0   15.2   20.0
## IL6                                            0.1    0.1    0.0    0.1    0.2
## IL8                                            0.2    0.2   -0.1    0.2    0.5
## IL10                                          -0.1    0.4   -0.6   -0.1    0.3
## IL17A                                        -14.3    6.0  -21.9  -14.1   -6.6
## IP10                                          -0.4    0.1   -0.5   -0.4   -0.2
## MIP1alpha                                     -5.9    2.8   -9.4   -5.8   -2.4
## MIP1beta                                       1.1    0.3    0.7    1.1    1.5
## S100A8_A9                                      0.0    0.0    0.0    0.0    0.0
## SDF1alpha                                      0.0    0.0   -0.1    0.0    0.0
## TNFalpha                                       7.7    5.6    0.4    7.8   14.8
## Inflammatory_index                            -2.0    8.3  -12.7   -1.9    8.5
## SexM                                          -7.9    8.0  -18.2   -7.9    2.6
## Age                                           18.9    7.2    9.7   18.9   28.3
## Acute_SARS_CoV_2_infectionTRUE                19.0    8.9    7.5   19.0   30.2
## Anticoagulation.at.eventTRUE                  -0.6    8.5  -11.4   -0.5   10.2
## Anticoagulation.chronicTRUE                    1.7    8.2   -8.7    1.7   12.1
## Platelet.aggregation.inhibitor.at.eventTRUE    5.9    8.0   -4.4    6.0   16.0
## Platelet.aggregation.inhibitor.chronicTRUE   -19.3    8.4  -30.0  -19.4   -8.5
## Immunosuppressed_admission                    -4.4    6.1  -12.1   -4.4    3.5
## Vaccination_statusonly_CoV2                   -1.3    6.4   -9.3   -1.2    6.8
## SeverityCoV2_or_Flu_int                       21.3    5.4   14.6   21.3   28.3
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.3    0.0  0.3   0.3   0.3  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##                                             mcse Rhat n_eff
## (Intercept)                                 1.7  1.0  2763 
## CL_IgG                                      0.0  1.0  2831 
## PA_IgG                                      0.0  1.0  2477 
## PE_IgG                                      0.1  1.0  5285 
## PG_IgG                                      0.1  1.0  4432 
## PI_IgG                                      0.1  1.0  3052 
## PS_IgG                                      0.0  1.0  2827 
## AnnV_IgG                                    0.0  1.0  3181 
## ß2GPI_IgG                                   0.0  1.0  2603 
## PT_IgG                                      0.1  1.0  2533 
## CL_IgM                                      0.0  1.0  2460 
## PA_IgM                                      0.0  1.0  2603 
## PE_IgM                                      0.1  1.0  3320 
## PG_IgM                                      0.1  1.0  2630 
## PI_IgM                                      0.1  1.0  2406 
## PS_IgM                                      0.0  1.0  2445 
## AnnV_IgM                                    0.0  1.0  2418 
## ß2GPI_IgM                                   0.0  1.0  1954 
## PT_IgM                                      0.0  1.0  2613 
## CL_IgA                                      0.0  1.0  2673 
## PA_IgA                                      0.0  1.0  3022 
## PC_IgA                                      0.1  1.0  2999 
## PE_IgA                                      0.1  1.0  2962 
## PG_IgA                                      0.0  1.0  2869 
## PI_IgA                                      0.0  1.0  3077 
## PS_IgA                                      0.0  1.0  2642 
## AnnV_IgA                                    0.0  1.0  2856 
## ß2GPI_IgA                                   0.0  1.0  2687 
## PT_IgA                                      0.0  1.0  3366 
## Spike_IgG                                   0.1  1.0  4618 
## RBD_IgG                                     0.1  1.0  3978 
## NC_IgG                                      0.1  1.0  4560 
## Spike_IgA                                   0.1  1.0  4563 
## RBD_IgA                                     0.1  1.0  3758 
## NC_IgA                                      0.1  1.0  2975 
## PC1_IgG                                     0.1  1.0  5223 
## PC1_IgA                                     0.1  1.0  5119 
## GCSF                                        0.0  1.0  2624 
## GMCSF                                       0.0  1.0  2535 
## IFNalpha                                    0.1  1.0  3373 
## IFNgamma                                    0.1  1.0  3722 
## IL1beta                                     0.1  1.0  3077 
## IL4                                         0.1  1.0  1772 
## IL6                                         0.0  1.0  2644 
## IL8                                         0.0  1.0  2678 
## IL10                                        0.0  1.0  2329 
## IL17A                                       0.1  1.0  3177 
## IP10                                        0.0  1.0  2212 
## MIP1alpha                                   0.1  1.0  2645 
## MIP1beta                                    0.0  1.0  1913 
## S100A8_A9                                   0.0  1.0  2720 
## SDF1alpha                                   0.0  1.0  2492 
## TNFalpha                                    0.1  1.0  2978 
## Inflammatory_index                          0.2  1.0  2564 
## SexM                                        0.1  1.0  5004 
## Age                                         0.1  1.0  3851 
## Acute_SARS_CoV_2_infectionTRUE              0.1  1.0  5197 
## Anticoagulation.at.eventTRUE                0.1  1.0  5602 
## Anticoagulation.chronicTRUE                 0.1  1.0  5420 
## Platelet.aggregation.inhibitor.at.eventTRUE 0.1  1.0  5018 
## Platelet.aggregation.inhibitor.chronicTRUE  0.1  1.0  5787 
## Immunosuppressed_admission                  0.1  1.0  4089 
## Vaccination_statusonly_CoV2                 0.1  1.0  3952 
## SeverityCoV2_or_Flu_int                     0.1  1.0  3169 
## mean_PPD                                    0.0  1.0  3936 
## log-posterior                               0.1  1.0  1572 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
s1_lin = summary(m1_lin,probs = c(0.025,.5,0.975)) %>%
  as.data.frame() %>%
  rownames_to_column() %>% 
  as_tibble() %>%
  filter(!(rowname %in% c("log-posterior","mean_PPD","(Intercept)","sigma"))) %>%
  mutate(rowname=gsub("TRUE","",rowname),
         OR=exp(`50%`), #the backticks, or grave accents, are used for non-syntactic names/combinations
         OR_low=exp(`2.5%`),
         OR_high=exp(`97.5%`),
         type="Uninformative prior") %>%
  arrange(-abs(`50%`))

8.6.2 LASSO

m1_lasso = stan_glm(Thrombosis_group~.,
                    family=binomial(link = "logit"),
                    prior=lasso(df=1),
                    data=data_LR_Bayes)
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 26.038 seconds (Warm-up)
## Chain 1:                28.333 seconds (Sampling)
## Chain 1:                54.371 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 44.739 seconds (Warm-up)
## Chain 2:                7.205 seconds (Sampling)
## Chain 2:                51.944 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 23.112 seconds (Warm-up)
## Chain 3:                28.493 seconds (Sampling)
## Chain 3:                51.605 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 25.823 seconds (Warm-up)
## Chain 4:                6.194 seconds (Sampling)
## Chain 4:                32.017 seconds (Total)
## Chain 4:
summary(m1_lasso)
## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      Thrombosis_group ~ .
##  algorithm:    sampling
##  sample:       4000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 112
##  predictors:   64
## 
## Estimates:
##                                               mean   sd   10%   50%   90%
## (Intercept)                                 -0.7    0.5 -1.3  -0.7   0.0 
## CL_IgG                                       0.0    0.0  0.0   0.0   0.0 
## PA_IgG                                       0.0    0.0  0.0   0.0   0.0 
## PE_IgG                                       0.0    0.0  0.0   0.0   0.0 
## PG_IgG                                       0.0    0.0  0.0   0.0   0.0 
## PI_IgG                                       0.0    0.0  0.0   0.0   0.0 
## PS_IgG                                       0.0    0.0  0.0   0.0   0.0 
## AnnV_IgG                                     0.0    0.0  0.0   0.0   0.0 
## ß2GPI_IgG                                    0.0    0.0  0.0   0.0   0.0 
## PT_IgG                                       0.0    0.0  0.0   0.0   0.0 
## CL_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PA_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PE_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PG_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PI_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PS_IgM                                       0.0    0.0  0.0   0.0   0.0 
## AnnV_IgM                                     0.0    0.0  0.0   0.0   0.0 
## ß2GPI_IgM                                    0.0    0.0  0.0   0.0   0.0 
## PT_IgM                                       0.0    0.0  0.0   0.0   0.0 
## CL_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PA_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PC_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PE_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PG_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PI_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PS_IgA                                       0.0    0.0  0.0   0.0   0.0 
## AnnV_IgA                                     0.0    0.0  0.0   0.0   0.0 
## ß2GPI_IgA                                    0.0    0.0  0.0   0.0   0.0 
## PT_IgA                                       0.0    0.0  0.0   0.0   0.0 
## Spike_IgG                                    0.0    0.0  0.0   0.0   0.0 
## RBD_IgG                                      0.0    0.0  0.0   0.0   0.0 
## NC_IgG                                       0.0    0.0  0.0   0.0   0.0 
## Spike_IgA                                    0.0    0.0  0.0   0.0   0.0 
## RBD_IgA                                      0.0    0.0  0.0   0.0   0.0 
## NC_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PC1_IgG                                      0.0    0.0  0.0   0.0   0.0 
## PC1_IgA                                      0.0    0.0  0.0   0.0   0.0 
## GCSF                                         0.0    0.0  0.0   0.0   0.0 
## GMCSF                                        0.0    0.0  0.0   0.0   0.0 
## IFNalpha                                     0.0    0.0  0.0   0.0   0.0 
## IFNgamma                                     0.0    0.0  0.0   0.0   0.0 
## IL1beta                                      0.0    0.0  0.0   0.0   0.0 
## IL4                                          0.0    0.0  0.0   0.0   0.0 
## IL6                                          0.0    0.0  0.0   0.0   0.0 
## IL8                                          0.0    0.0  0.0   0.0   0.0 
## IL10                                         0.0    0.0  0.0   0.0   0.0 
## IL17A                                        0.0    0.0  0.0   0.0   0.0 
## IP10                                         0.0    0.0  0.0   0.0   0.0 
## MIP1alpha                                    0.0    0.0  0.0   0.0   0.0 
## MIP1beta                                     0.0    0.0  0.0   0.0   0.0 
## S100A8_A9                                    0.0    0.0  0.0   0.0   0.0 
## SDF1alpha                                    0.0    0.0  0.0   0.0   0.0 
## TNFalpha                                     0.0    0.0  0.0   0.0   0.0 
## Inflammatory_index                           0.0    0.0  0.0   0.0   0.0 
## SexM                                         0.0    0.0  0.0   0.0   0.0 
## Age                                          0.0    0.0  0.0   0.0   0.0 
## Acute_SARS_CoV_2_infectionTRUE               0.0    0.0  0.0   0.0   0.0 
## Anticoagulation.at.eventTRUE                 0.0    0.0  0.0   0.0   0.0 
## Anticoagulation.chronicTRUE                  0.0    0.0  0.0   0.0   0.0 
## Platelet.aggregation.inhibitor.at.eventTRUE  0.0    0.0  0.0   0.0   0.0 
## Platelet.aggregation.inhibitor.chronicTRUE   0.0    0.0  0.0   0.0   0.0 
## Immunosuppressed_admission                   0.0    0.0  0.0   0.0   0.0 
## Vaccination_statusonly_CoV2                  0.0    0.0  0.0   0.0   0.0 
## SeverityCoV2_or_Flu_int                      0.0    0.0  0.0   0.0   0.0 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.3    0.1  0.2   0.3   0.4  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##                                             mcse Rhat n_eff
## (Intercept)                                 0.0  1.0  1384 
## CL_IgG                                      0.0  1.0  4128 
## PA_IgG                                      0.0  1.0  3332 
## PE_IgG                                      0.0  1.0  3945 
## PG_IgG                                      0.0  1.0  3341 
## PI_IgG                                      0.0  1.0  3489 
## PS_IgG                                      0.0  1.0  4037 
## AnnV_IgG                                    0.0  1.0  3116 
## ß2GPI_IgG                                   0.0  1.0  2259 
## PT_IgG                                      0.0  1.0  3341 
## CL_IgM                                      0.0  1.0  2469 
## PA_IgM                                      0.0  1.0  3687 
## PE_IgM                                      0.0  1.0  3179 
## PG_IgM                                      0.0  1.0  3169 
## PI_IgM                                      0.0  1.0  2373 
## PS_IgM                                      0.0  1.0  2802 
## AnnV_IgM                                    0.0  1.0  2600 
## ß2GPI_IgM                                   0.0  1.0  3246 
## PT_IgM                                      0.0  1.0   840 
## CL_IgA                                      0.0  1.0  3079 
## PA_IgA                                      0.0  1.0  2888 
## PC_IgA                                      0.0  1.0  2083 
## PE_IgA                                      0.0  1.0  2903 
## PG_IgA                                      0.0  1.0  2425 
## PI_IgA                                      0.0  1.0  3757 
## PS_IgA                                      0.0  1.0  3882 
## AnnV_IgA                                    0.0  1.0  3026 
## ß2GPI_IgA                                   0.0  1.0  3631 
## PT_IgA                                      0.0  1.0  2605 
## Spike_IgG                                   0.0  1.0  2772 
## RBD_IgG                                     0.0  1.0  3464 
## NC_IgG                                      0.0  1.0  3086 
## Spike_IgA                                   0.0  1.0  4120 
## RBD_IgA                                     0.0  1.0  3451 
## NC_IgA                                      0.0  1.0  3494 
## PC1_IgG                                     0.0  1.0  4129 
## PC1_IgA                                     0.0  1.0  2778 
## GCSF                                        0.0  1.0   827 
## GMCSF                                       0.0  1.0  2267 
## IFNalpha                                    0.0  1.0  2233 
## IFNgamma                                    0.0  1.0  2773 
## IL1beta                                     0.0  1.0  3134 
## IL4                                         0.0  1.0  2081 
## IL6                                         0.0  1.0   270 
## IL8                                         0.0  1.0   679 
## IL10                                        0.0  1.0  2085 
## IL17A                                       0.0  1.0  2236 
## IP10                                        0.0  1.0  1213 
## MIP1alpha                                   0.0  1.0  2837 
## MIP1beta                                    0.0  1.0  2642 
## S100A8_A9                                   0.0  1.0   325 
## SDF1alpha                                   0.0  1.0   242 
## TNFalpha                                    0.0  1.0  3017 
## Inflammatory_index                          0.0  1.0  3393 
## SexM                                        0.0  1.0  3080 
## Age                                         0.0  1.0  3252 
## Acute_SARS_CoV_2_infectionTRUE              0.0  1.0  4008 
## Anticoagulation.at.eventTRUE                0.0  1.0  3429 
## Anticoagulation.chronicTRUE                 0.0  1.0  3837 
## Platelet.aggregation.inhibitor.at.eventTRUE 0.0  1.0  3030 
## Platelet.aggregation.inhibitor.chronicTRUE  0.0  1.0  3271 
## Immunosuppressed_admission                  0.0  1.0  3196 
## Vaccination_statusonly_CoV2                 0.0  1.0  2945 
## SeverityCoV2_or_Flu_int                     0.0  1.0  3266 
## mean_PPD                                    0.0  1.0  4259 
## log-posterior                               0.5  1.0   427 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
s1_lasso = summary(m1_lasso,probs = c(0.025,.5,0.975)) %>%
  as.data.frame() %>%
  rownames_to_column() %>% 
  as_tibble() %>%
  filter(!(rowname %in% c("log-posterior","mean_PPD","(Intercept)","sigma"))) %>%
  mutate(rowname=gsub("TRUE","",rowname),
         OR=exp(`50%`),
         OR_low=exp(`2.5%`),
         OR_high=exp(`97.5%`),
         type="LASSO") %>%
  arrange(-abs(`50%`))

8.6.3 Regularised horseshoe (Piironen and Vehtari (2017), https://doi.org/10.1214/17-EJS1337SI)

The regulairsed horseshoe prior is a generalisation of the horseshoe prior that allows us to specify a minimum level of regularization to the largest values (Piironen and Vehtari (2017), https://doi.org/10.1214/17-EJS1337SI)

Here, they recommend to set the globalscale argument equal to the ratio of the expected number of non-zero coefficients to the expected number of zero coefficients, divided by the square root of the number of observations.

  • Example for small dataset (18/110)/sqrt(128)
  • Example for large dataset (35/1500)/sqrt(1535)
  • What we have used for SARS-CoV-2 (https://doi.org/10.1101/2020.05.31.20118554, https://doi.org/10.5281/zenodo.7454292), (19/570)/sqrt(589)
  • globalscale = p0 / (D - p0) * 1 / sqrt(N) with
    • p0 = prior guess for the number of relevant (i.e., non-zero) regression coefficients/‘variables’,
    • N = Number of observations,
    • D = Number of regression coefficients or Number of ‘variables’,

The utilisation of this shrinkage prior shrinks the beta coefficients towards zero while the local parameters allow some of the beta coefficient, i.e. features, to escape the shrinkage (https://avehtari.github.io/modelselection/regularizedhorseshoe_slides.pdf). This is why this is a valid approach to identify relevant parameters.

globalscale = (18/110)/sqrt(128)
                                
m1_hs = stan_glm(Thrombosis_group~.,
                 family=binomial(link = "logit"),
                 prior=hs(global_scale=globalscale),
                 data=data_LR_Bayes)
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 1).
## Chain 1: 
## Chain 1: Gradient evaluation took 0 seconds
## Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 1: Adjust your expectations accordingly!
## Chain 1: 
## Chain 1: 
## Chain 1: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 1: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 1: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 1: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 1: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 1: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 1: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 1: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 1: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 1: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 1: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 1: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 1: 
## Chain 1:  Elapsed Time: 46.199 seconds (Warm-up)
## Chain 1:                38.106 seconds (Sampling)
## Chain 1:                84.305 seconds (Total)
## Chain 1: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 2).
## Chain 2: 
## Chain 2: Gradient evaluation took 0 seconds
## Chain 2: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 2: Adjust your expectations accordingly!
## Chain 2: 
## Chain 2: 
## Chain 2: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 2: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 2: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 2: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 2: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 2: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 2: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 2: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 2: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 2: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 2: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 2: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 2: 
## Chain 2:  Elapsed Time: 91.809 seconds (Warm-up)
## Chain 2:                41.796 seconds (Sampling)
## Chain 2:                133.605 seconds (Total)
## Chain 2: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 3).
## Chain 3: 
## Chain 3: Gradient evaluation took 0 seconds
## Chain 3: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 3: Adjust your expectations accordingly!
## Chain 3: 
## Chain 3: 
## Chain 3: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 3: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 3: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 3: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 3: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 3: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 3: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 3: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 3: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 3: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 3: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 3: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 3: 
## Chain 3:  Elapsed Time: 52.735 seconds (Warm-up)
## Chain 3:                37.365 seconds (Sampling)
## Chain 3:                90.1 seconds (Total)
## Chain 3: 
## 
## SAMPLING FOR MODEL 'bernoulli' NOW (CHAIN 4).
## Chain 4: 
## Chain 4: Gradient evaluation took 0 seconds
## Chain 4: 1000 transitions using 10 leapfrog steps per transition would take 0 seconds.
## Chain 4: Adjust your expectations accordingly!
## Chain 4: 
## Chain 4: 
## Chain 4: Iteration:    1 / 2000 [  0%]  (Warmup)
## Chain 4: Iteration:  200 / 2000 [ 10%]  (Warmup)
## Chain 4: Iteration:  400 / 2000 [ 20%]  (Warmup)
## Chain 4: Iteration:  600 / 2000 [ 30%]  (Warmup)
## Chain 4: Iteration:  800 / 2000 [ 40%]  (Warmup)
## Chain 4: Iteration: 1000 / 2000 [ 50%]  (Warmup)
## Chain 4: Iteration: 1001 / 2000 [ 50%]  (Sampling)
## Chain 4: Iteration: 1200 / 2000 [ 60%]  (Sampling)
## Chain 4: Iteration: 1400 / 2000 [ 70%]  (Sampling)
## Chain 4: Iteration: 1600 / 2000 [ 80%]  (Sampling)
## Chain 4: Iteration: 1800 / 2000 [ 90%]  (Sampling)
## Chain 4: Iteration: 2000 / 2000 [100%]  (Sampling)
## Chain 4: 
## Chain 4:  Elapsed Time: 39.345 seconds (Warm-up)
## Chain 4:                37.228 seconds (Sampling)
## Chain 4:                76.573 seconds (Total)
## Chain 4:
summary(m1_hs)
## 
## Model Info:
##  function:     stan_glm
##  family:       binomial [logit]
##  formula:      Thrombosis_group ~ .
##  algorithm:    sampling
##  sample:       4000 (posterior sample size)
##  priors:       see help('prior_summary')
##  observations: 112
##  predictors:   64
## 
## Estimates:
##                                               mean   sd   10%   50%   90%
## (Intercept)                                 -2.8    1.2 -4.4  -2.7  -1.4 
## CL_IgG                                       0.0    0.0  0.0   0.0   0.0 
## PA_IgG                                       0.0    0.0  0.0   0.0   0.0 
## PE_IgG                                       0.0    0.2  0.0   0.0   0.0 
## PG_IgG                                       0.0    0.1  0.0   0.0   0.0 
## PI_IgG                                       0.0    0.1  0.0   0.0   0.0 
## PS_IgG                                       0.0    0.0  0.0   0.0   0.0 
## AnnV_IgG                                     0.0    0.0  0.0   0.0   0.0 
## ß2GPI_IgG                                    0.0    0.0  0.0   0.0   0.0 
## PT_IgG                                       0.0    0.0  0.0   0.0   0.0 
## CL_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PA_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PE_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PG_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PI_IgM                                       0.0    0.0  0.0   0.0   0.0 
## PS_IgM                                       0.0    0.0  0.0   0.0   0.0 
## AnnV_IgM                                     0.0    0.0  0.0   0.0   0.0 
## ß2GPI_IgM                                    0.0    0.0  0.0   0.0   0.0 
## PT_IgM                                       0.0    0.0  0.0   0.0   0.0 
## CL_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PA_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PC_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PE_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PG_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PI_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PS_IgA                                       0.0    0.0  0.0   0.0   0.0 
## AnnV_IgA                                     0.0    0.0  0.0   0.0   0.0 
## ß2GPI_IgA                                    0.0    0.0  0.0   0.0   0.0 
## PT_IgA                                       0.0    0.0  0.0   0.0   0.0 
## Spike_IgG                                    0.0    0.0  0.0   0.0   0.0 
## RBD_IgG                                      0.0    0.0  0.0   0.0   0.0 
## NC_IgG                                       0.0    0.0  0.0   0.0   0.0 
## Spike_IgA                                    0.0    0.1  0.0   0.0   0.0 
## RBD_IgA                                      0.0    0.0  0.0   0.0   0.0 
## NC_IgA                                       0.0    0.0  0.0   0.0   0.0 
## PC1_IgG                                      0.0    0.0  0.0   0.0   0.0 
## PC1_IgA                                      0.0    0.0  0.0   0.0   0.0 
## GCSF                                         0.0    0.0  0.0   0.0   0.0 
## GMCSF                                        0.0    0.0  0.0   0.0   0.0 
## IFNalpha                                     0.0    0.0  0.0   0.0   0.0 
## IFNgamma                                     0.0    0.1  0.0   0.0   0.0 
## IL1beta                                      0.0    0.0  0.0   0.0   0.0 
## IL4                                          0.0    0.0  0.0   0.0   0.0 
## IL6                                          0.0    0.0  0.0   0.0   0.0 
## IL8                                          0.0    0.0  0.0   0.0   0.0 
## IL10                                         0.0    0.0  0.0   0.0   0.0 
## IL17A                                        0.0    0.0  0.0   0.0   0.0 
## IP10                                         0.0    0.0  0.0   0.0   0.0 
## MIP1alpha                                    0.0    0.0  0.0   0.0   0.0 
## MIP1beta                                     0.0    0.0  0.0   0.0   0.0 
## S100A8_A9                                    0.0    0.0  0.0   0.0   0.0 
## SDF1alpha                                    0.0    0.0  0.0   0.0   0.0 
## TNFalpha                                     0.0    0.0  0.0   0.0   0.0 
## Inflammatory_index                           0.0    0.0  0.0   0.0   0.0 
## SexM                                         0.0    0.1  0.0   0.0   0.0 
## Age                                          0.0    0.0  0.0   0.0   0.0 
## Acute_SARS_CoV_2_infectionTRUE               0.2    0.8  0.0   0.0   0.0 
## Anticoagulation.at.eventTRUE                 0.0    0.0  0.0   0.0   0.0 
## Anticoagulation.chronicTRUE                  0.0    0.0  0.0   0.0   0.0 
## Platelet.aggregation.inhibitor.at.eventTRUE  0.0    0.0  0.0   0.0   0.0 
## Platelet.aggregation.inhibitor.chronicTRUE   0.0    0.1  0.0   0.0   0.0 
## Immunosuppressed_admission                   0.0    0.1  0.0   0.0   0.0 
## Vaccination_statusonly_CoV2                  0.0    0.0  0.0   0.0   0.0 
## SeverityCoV2_or_Flu_int                      1.1    0.4  0.7   1.1   1.5 
## 
## Fit Diagnostics:
##            mean   sd   10%   50%   90%
## mean_PPD 0.3    0.0  0.2   0.3   0.4  
## 
## The mean_ppd is the sample average posterior predictive distribution of the outcome variable (for details see help('summary.stanreg')).
## 
## MCMC diagnostics
##                                             mcse Rhat n_eff
## (Intercept)                                 0.0  1.0  3760 
## CL_IgG                                      0.0  1.0  3639 
## PA_IgG                                      0.0  1.0  3048 
## PE_IgG                                      0.0  1.0  1375 
## PG_IgG                                      0.0  1.0  3162 
## PI_IgG                                      0.0  1.0  1484 
## PS_IgG                                      0.0  1.0  4707 
## AnnV_IgG                                    0.0  1.0  2907 
## ß2GPI_IgG                                   0.0  1.0  2932 
## PT_IgG                                      0.0  1.0  3377 
## CL_IgM                                      0.0  1.0  2958 
## PA_IgM                                      0.0  1.0  4149 
## PE_IgM                                      0.0  1.0  3036 
## PG_IgM                                      0.0  1.0  3730 
## PI_IgM                                      0.0  1.0  2276 
## PS_IgM                                      0.0  1.0  2943 
## AnnV_IgM                                    0.0  1.0  1959 
## ß2GPI_IgM                                   0.0  1.0  2093 
## PT_IgM                                      0.0  1.0  2706 
## CL_IgA                                      0.0  1.0  3350 
## PA_IgA                                      0.0  1.0  3732 
## PC_IgA                                      0.0  1.0  2508 
## PE_IgA                                      0.0  1.0  3340 
## PG_IgA                                      0.0  1.0  3875 
## PI_IgA                                      0.0  1.0  3059 
## PS_IgA                                      0.0  1.0  3818 
## AnnV_IgA                                    0.0  1.0  4260 
## ß2GPI_IgA                                   0.0  1.0  4173 
## PT_IgA                                      0.0  1.0  4111 
## Spike_IgG                                   0.0  1.0  3406 
## RBD_IgG                                     0.0  1.0  2806 
## NC_IgG                                      0.0  1.0  3356 
## Spike_IgA                                   0.0  1.0  2424 
## RBD_IgA                                     0.0  1.0  2698 
## NC_IgA                                      0.0  1.0  2876 
## PC1_IgG                                     0.0  1.0  3107 
## PC1_IgA                                     0.0  1.0  3673 
## GCSF                                        0.0  1.0  2039 
## GMCSF                                       0.0  1.0  3928 
## IFNalpha                                    0.0  1.0  3756 
## IFNgamma                                    0.0  1.0  3073 
## IL1beta                                     0.0  1.0  3926 
## IL4                                         0.0  1.0  3560 
## IL6                                         0.0  1.0  2887 
## IL8                                         0.0  1.0  3758 
## IL10                                        0.0  1.0  3515 
## IL17A                                       0.0  1.0  3059 
## IP10                                        0.0  1.0  4713 
## MIP1alpha                                   0.0  1.0  3625 
## MIP1beta                                    0.0  1.0  4227 
## S100A8_A9                                   0.0  1.0  1943 
## SDF1alpha                                   0.0  1.0  2618 
## TNFalpha                                    0.0  1.0  4195 
## Inflammatory_index                          0.0  1.0  2891 
## SexM                                        0.0  1.0  2323 
## Age                                         0.0  1.0  3891 
## Acute_SARS_CoV_2_infectionTRUE              0.1  1.1    57 
## Anticoagulation.at.eventTRUE                0.0  1.0  3471 
## Anticoagulation.chronicTRUE                 0.0  1.0  3503 
## Platelet.aggregation.inhibitor.at.eventTRUE 0.0  1.0  3327 
## Platelet.aggregation.inhibitor.chronicTRUE  0.0  1.0  2760 
## Immunosuppressed_admission                  0.0  1.0  1917 
## Vaccination_statusonly_CoV2                 0.0  1.0  2325 
## SeverityCoV2_or_Flu_int                     0.0  1.0   119 
## mean_PPD                                    0.0  1.0  3657 
## log-posterior                               0.4  1.0  1017 
## 
## For each parameter, mcse is Monte Carlo standard error, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence Rhat=1).
s1_hs = summary(m1_hs,probs = c(0.025,.5,0.975)) %>%
  as.data.frame() %>%
  rownames_to_column() %>% 
  as_tibble() %>%
  filter(!(rowname %in% c("log-posterior","mean_PPD","(Intercept)","sigma"))) %>%
  mutate(rowname=gsub("TRUE","",rowname),
         OR=exp(`50%`),
         OR_low=exp(`2.5%`),
         OR_high=exp(`97.5%`),
         type="Regularized horseshoe") %>%
  arrange(-abs(`50%`))

8.6.4 Bring them together

post_icd10 = bind_rows(s1_lasso,s1_hs,s1_lin) %>%
  mutate(rowname=factor(rowname,levels=s1_hs$rowname[order(s1_hs$`50%`)]),
         type=factor(type,levels=c("Uninformative prior","LASSO","Regularized horseshoe")))
saveRDS(post_icd10,file="post_icd10.rds")

ranks = filter(post_icd10,type=="Regularized horseshoe") %>%
  arrange(-`50%`) %>%
  mutate(rank=row_number()) %>%
  mutate(ICD10=rowname) %>%
  dplyr::select(ICD10,rank)

tibble_post_icd10 <- post_icd10 %>%
  mutate(OR=paste0(sprintf("%.2f",OR)," (",sprintf("%.2f",OR_low),"-",sprintf("%.2f",OR_high),")")) %>%
  dplyr::select(ICD10=rowname,OR,type) %>%
  pivot_wider(names_from=type,values_from=OR) %>%
  left_join(ranks) %>%
  arrange(rank) %>% 
  filter(rank %in% c(1:10,(max(rank)-9):max(rank))) %>%
  dplyr::select(rank,ICD10,`Uninformative prior`,LASSO,`Regularized horseshoe`)
write.csv(tibble_post_icd10,"tibble_post_icd10_ranktable.csv")
print(tibble_post_icd10)
## # A tibble: 20 × 5
##     rank ICD10                   `Uninformative prior`             LASSO Regul…¹
##    <int> <fct>                   <chr>                             <chr> <chr>  
##  1     1 SeverityCoV2_or_Flu_int 1733400303.17 (50701.53-98894827… 1.00… 2.94 (…
##  2     2 IL6                     1.14 (0.99-1.36)                  1.00… 1.01 (…
##  3     3 GCSF                    1.44 (0.81-2.23)                  1.00… 1.00 (…
##  4     4 IL8                     1.16 (0.78-2.09)                  1.00… 1.00 (…
##  5     5 CL_IgM                  1.73 (0.20-19.48)                 1.00… 1.00 (…
##  6     6 ß2GPI_IgG               2.19 (0.58-9.49)                  1.00… 1.00 (…
##  7     7 PT_IgM                  1.95 (0.75-5.87)                  1.00… 1.00 (…
##  8     8 PS_IgM                  7.50 (0.70-94.18)                 1.00… 1.00 (…
##  9     9 CL_IgA                  7.63 (0.59-162.30)                1.00… 1.00 (…
## 10    10 PI_IgA                  2.62 (0.02-375.26)                1.00… 1.00 (…
## 11    54 PG_IgA                  0.18 (0.01-3.52)                  1.00… 1.00 (…
## 12    55 CL_IgG                  1.03 (0.20-5.13)                  1.00… 1.00 (…
## 13    56 IL10                    0.92 (0.36-1.65)                  1.00… 1.00 (…
## 14    57 AnnV_IgG                0.46 (0.01-9.12)                  1.00… 1.00 (…
## 15    58 IP10                    0.70 (0.53-0.88)                  1.00… 1.00 (…
## 16    59 PI_IgG                  0.04 (0.00-2.83)                  1.00… 1.00 (…
## 17    60 PI_IgM                  0.02 (0.00-0.61)                  1.00… 1.00 (…
## 18    61 SDF1alpha               0.97 (0.93-1.00)                  1.00… 1.00 (…
## 19    62 ß2GPI_IgM               0.16 (0.05-0.43)                  1.00… 1.00 (…
## 20    63 AnnV_IgM                0.61 (0.29-1.34)                  1.00… 0.99 (…
## # … with abbreviated variable name ¹​`Regularized horseshoe`
post_icd10 %>%
  mutate(type=factor(type,labels=c("Unregularized","LASSO","Regularized Horseshoe"))) %>%
  ggplot() +
  geom_pointrange(aes(x=rowname,y=OR,ymin=OR_low,ymax=OR_high,colour=type)) +
  geom_hline(yintercept=1,linetype=2) +
  scale_colour_discrete(guide='none') +
  scale_y_log10() +
  coord_flip() +
  facet_wrap(~type,ncol=3, scales = "free_x") +
  theme(axis.text.y = element_text(size = 5)) +
  labs(y="Odds ratio (95% credible interval)",x="Predictors",title="Multiple logistic regression in Bayesian framework")

write.csv(post_icd10,"post_icd10_ranktable.csv")

Observations and conclusions:

  • In this Bayesian multiple logistic regression, the usage of the uninformative prior has mounded in the expected statistical fluctuation with large credible intervals.
  • We note that the approach was generally more robust than the conventional GLM; the outcome of the uninformative prior resembles what we had observed with the conventional gaussian GLM.
  • LASSO and regularised horseshoe prior distributions properly deal with this fluctuation.
    • Disease severity stands out as the only positive association, markedly with regularised horseshoe.
    • No noteworthy negative association.

8.7 Boruta

Essentially, we aim to run a random forest regression and to select only those parameters whose importance is above a certain threshold.

  1. Rather than relying on or competing with other features, Boruta creates shadows of every single feature by randomly shuffling them; the shadow features (step 1).
  2. Following the generation of the shadow features, which is essentially a duplication of the dataset by shuffling all values in each column, a random forest is fitted on the them using the outcome variable (step 2).
  3. At each iteration, the algorithm assesses whether the original feature has a higher importance than the highest of its shadow features. As a measure of importance, the Mean Decrease Accuracy is used, with higher scores being more important. The z-score represents the number of standard deviations from the mean data point. Unimportant features are dismissed.
  4. This is done iteratively, i.e. the whole process is being repeated multiple times (in our case: 1000 times) so that a distribution of importance is attributed to each feature.
  5. The algorithm stops either at a predefined maximum iteration or when all features are confirmed or rejected.
  6. Subsequently, the importance of each of the original features is visualised. If the original feature importance is higher than the threshold, the feature is ‘confirmed important’. If there is insecurity, it may be ‘tentative’. Else, it is ‘confirmed unimportant’.

More details can be found here:

We expect Boruta not to be confounded by interdependencies of variables. However, once we build our final model in a later step, we may have to account for collinear features again.

set.seed(1)
boruta_output <- Boruta(Thrombosis_group ~ ., data=na.omit(data_LR), doTrace=2, maxRuns=1000)  # perform Boruta search
boruta_signif <- names(boruta_output$finalDecision[boruta_output$finalDecision %in% c("Confirmed", "Tentative")])  # collect Confirmed and Tentative variables
print(boruta_signif)  # significant variables
##  [1] "ß2GPI_IgM"                   "NC_IgG"                     
##  [3] "Spike_IgA"                   "NC_IgA"                     
##  [5] "PC1_IgA"                     "GCSF"                       
##  [7] "IL4"                         "IL6"                        
##  [9] "IL8"                         "IL10"                       
## [11] "IP10"                        "SDF1alpha"                  
## [13] "Acute_SARS_CoV_2_infection"  "Vaccination_statusonly_CoV2"
## [15] "SeverityCoV2_or_Flu_int"
getNonRejectedFormula(boruta_output)
## Thrombosis_group ~ ß2GPI_IgM + NC_IgG + Spike_IgA + NC_IgA + 
##     PC1_IgA + GCSF + IL4 + IL6 + IL8 + IL10 + IP10 + SDF1alpha + 
##     Acute_SARS_CoV_2_infection + Vaccination_statusonly_CoV2 + 
##     SeverityCoV2_or_Flu_int
## <environment: 0x000001f0da1d4980>
plotImpHistory(boruta_output)

plot(boruta_output, cex.axis=.4, las=2, xlab="", main="Variable Importance")  # plot variable importance

Observations and conclusions:

  • Boruta confirms many of the results we have observed before.
    • Severity is the most important predictor.
    • NC_IgA and Spike_IgA in general seem important.
    • The anti-SARS-CoV-2 IgGs are important, too.
  • There is no competition among features here as collinearity is not an issue.
  • The model suggests that B2GPI_IgM is an important feature.
  • The model suggests that acute infection with SARS-CoV-2 is an important feature.

8.8 Use the most important features to build model

8.8.1 Conclusions from the approaches above and model creation

Let us list some of the consensus information here:

  1. Severity is the most important predictor.
  2. All anti-SARS-CoV-2 measurements, including their linear combinations, contain important information. It is unclear which of them is more essential than another one as they are largely, but not entirely, overlapping, i.e. collinear, except for NC IgA. All of them shall be tried.
  3. B2GPI IgM may be somehow associated and it can be tested in a model.
  4. Among the cytokines, GCSF, Inflammatory index, IFN alpha, IL4, IL6, IL8, IL10, SDF1 alpha have found to carry some importance.

A good way to go about this is to:

  1. Test and include those parameters that have shown most promising in above models.
  2. Account for collinearity. What cannot be accounted for during preprocessing will be looked into during model development, following a step-wise approach.
    • We go from simple to more complex.
    • When adding a parameter, we check whether this improves the model’s AIC. If not, we drop the parameter and add another one.
    • We also consider the residual deviance.
    • We aim to end up with the simplest model possible, i.e. we avoid redundant parameters.
  3. We employ a generalised linear model (GLM). If needed, we might use a generalised linear mixed model, e.g. LME4 https://www.rdocumentation.org/packages/lme4/versions/1.1-31), as appropriate. For some insights into mixed-effects models and their application, see https://stats.stackexchange.com/questions/275450/when-to-use-mixed-effect-model.
set.seed(1)

model1 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 109.1, residual deviance 105.1, df=111

model2 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgG, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 111.1, residual deviance 98.89, df=111

model3 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 106.1, residual deviance 100.1, df=111

model4 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 104.2, residual deviance 96.24, df=111

model5 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + PC1_IgA, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 105.9, residual deviance 95.91, df=111

model6 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgG, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 106.1, residual deviance 96.09, df=111

model7 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 104.6, residual deviance 94.08, df=111

model8 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 101.6, residual deviance 89.62, df=111

model9 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 97.12, residual deviance 83.12, df=111

model10 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 97.38, residual deviance 81.38, df=111

model11 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 91.41, residual deviance 73.41, df=111

model12 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 90.29, residual deviance 70.29, df=111

model13 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index + IFNalpha, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 91.58, residual deviance 69.58, df=111

model14 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index + IL4, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 89.91, residual deviance 67.91, df=111

model15 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index + IL4 + IL6, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 87.93, residual deviance 63.93, df=111

model16 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index + IL4 + IL6 + IL8, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 89.88, residual deviance 63.88, df=111

model17 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index + IL4 + IL6 + IL10, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 89.92, residual deviance 63.92, df=111

model18 <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index + IL4 + IL6 + SDF1alpha, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
# AIC: 89.55, residual deviance 63.55, df=111

8.8.2 We make predictions using the best model

best_model <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA +
               ß2GPI_IgM + GCSF + Inflammatory_index + IL4 + IL6 + SDF1alpha, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
summary(best_model)
## 
## Call:
## glm(formula = Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + 
##     PC1_IgG + Spike_IgA + RBD_IgG + RBD_IgA + ß2GPI_IgM + GCSF + 
##     Inflammatory_index + IL4 + IL6 + SDF1alpha, family = binomial(link = "logit"), 
##     data = data_LR)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8117  -0.4675  -0.1206   0.2050   3.4532  
## 
## Coefficients:
##                           Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -2.8783616  3.3010843  -0.872 0.383239    
## SeverityCoV2_or_Flu_int  1.3050814  0.3640231   3.585 0.000337 ***
## NC_IgA                   0.5448058  0.3664767   1.487 0.137119    
## PC1_IgG                  0.4359815  0.6714943   0.649 0.516164    
## Spike_IgA                1.5387442  0.6778684   2.270 0.023209 *  
## RBD_IgG                 -1.4537991  1.0366304  -1.402 0.160788    
## RBD_IgA                 -0.8463592  0.5076818  -1.667 0.095493 .  
## ß2GPI_IgM               -0.0354969  0.0194990  -1.820 0.068690 .  
## GCSF                     0.0291701  0.0103654   2.814 0.004890 ** 
## Inflammatory_index      -0.1198584  0.1522527  -0.787 0.431145    
## IL4                      0.0822651  0.1557373   0.528 0.597340    
## IL6                      0.0066867  0.0042048   1.590 0.111777    
## SDF1alpha               -0.0005713  0.0009320  -0.613 0.539925    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 135.801  on 111  degrees of freedom
## Residual deviance:  63.553  on  99  degrees of freedom
## AIC: 89.553
## 
## Number of Fisher Scoring iterations: 7
best_model_AIC <- stepAIC(best_model) #We want to have a model that is as simple as possible.
## Start:  AIC=89.55
## Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + 
##     Spike_IgA + RBD_IgG + RBD_IgA + ß2GPI_IgM + GCSF + Inflammatory_index + 
##     IL4 + IL6 + SDF1alpha
## 
##                           Df Deviance     AIC
## - SDF1alpha                1   63.931  87.931
## - PC1_IgG                  1   63.982  87.982
## - IL4                      1   63.992  87.992
## - Inflammatory_index       1   64.276  88.276
## <none>                         63.553  89.553
## - RBD_IgG                  1   65.806  89.806
## - NC_IgA                   1   65.978  89.978
## - IL6                      1   66.510  90.510
## - RBD_IgA                  1   67.206  91.206
## - ß2GPI_IgM                1   67.273  91.273
## - GCSF                     1   70.073  94.073
## - Spike_IgA                1   70.917  94.917
## - SeverityCoV2_or_Flu_int  1   83.303 107.303
## 
## Step:  AIC=87.93
## Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + PC1_IgG + 
##     Spike_IgA + RBD_IgG + RBD_IgA + ß2GPI_IgM + GCSF + Inflammatory_index + 
##     IL4 + IL6
## 
##                           Df Deviance     AIC
## - PC1_IgG                  1   64.445  86.445
## - IL4                      1   65.281  87.281
## <none>                         63.931  87.931
## - RBD_IgG                  1   66.573  88.573
## - ß2GPI_IgM                1   67.294  89.294
## - NC_IgA                   1   67.688  89.688
## - IL6                      1   67.914  89.914
## - RBD_IgA                  1   68.349  90.349
## - Inflammatory_index       1   70.324  92.324
## - GCSF                     1   71.070  93.070
## - Spike_IgA                1   73.215  95.215
## - SeverityCoV2_or_Flu_int  1   83.395 105.395
## 
## Step:  AIC=86.44
## Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + Spike_IgA + 
##     RBD_IgG + RBD_IgA + ß2GPI_IgM + GCSF + Inflammatory_index + 
##     IL4 + IL6
## 
##                           Df Deviance     AIC
## - IL4                      1   65.641  85.641
## <none>                         64.445  86.445
## - ß2GPI_IgM                1   67.881  87.881
## - RBD_IgA                  1   68.457  88.457
## - IL6                      1   68.595  88.595
## - NC_IgA                   1   68.928  88.928
## - RBD_IgG                  1   69.278  89.278
## - Inflammatory_index       1   71.207  91.207
## - GCSF                     1   72.455  92.455
## - Spike_IgA                1   74.477  94.477
## - SeverityCoV2_or_Flu_int  1   94.945 114.945
## 
## Step:  AIC=85.64
## Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + Spike_IgA + 
##     RBD_IgG + RBD_IgA + ß2GPI_IgM + GCSF + Inflammatory_index + 
##     IL6
## 
##                           Df Deviance     AIC
## <none>                         65.641  85.641
## - RBD_IgA                  1   68.920  86.920
## - ß2GPI_IgM                1   68.931  86.931
## - NC_IgA                   1   69.341  87.341
## - IL6                      1   70.765  88.765
## - RBD_IgG                  1   70.817  88.817
## - Inflammatory_index       1   71.277  89.277
## - GCSF                     1   73.820  91.820
## - Spike_IgA                1   75.499  93.499
## - SeverityCoV2_or_Flu_int  1   99.494 117.494
summary(best_model_AIC)
## 
## Call:
## glm(formula = Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA + 
##     Spike_IgA + RBD_IgG + RBD_IgA + ß2GPI_IgM + GCSF + Inflammatory_index + 
##     IL6, family = binomial(link = "logit"), data = data_LR)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7195  -0.4547  -0.1309   0.2950   3.2503  
## 
## Coefficients:
##                          Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             -5.501299   1.346553  -4.085 4.40e-05 ***
## SeverityCoV2_or_Flu_int  1.355051   0.327588   4.136 3.53e-05 ***
## NC_IgA                   0.566391   0.307276   1.843  0.06529 .  
## Spike_IgA                1.619212   0.645750   2.507  0.01216 *  
## RBD_IgG                 -0.922462   0.457697  -2.015  0.04386 *  
## RBD_IgA                 -0.721473   0.442120  -1.632  0.10271    
## ß2GPI_IgM               -0.029540   0.016877  -1.750  0.08007 .  
## GCSF                     0.029748   0.009862   3.016  0.00256 ** 
## Inflammatory_index      -0.154214   0.072648  -2.123  0.03377 *  
## IL6                      0.008515   0.004008   2.124  0.03365 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 135.801  on 111  degrees of freedom
## Residual deviance:  65.641  on 102  degrees of freedom
## AIC: 85.641
## 
## Number of Fisher Scoring iterations: 6
minimum_model <- glm(Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
summary(minimum_model)
## 
## Call:
## glm(formula = Thrombosis_group ~ SeverityCoV2_or_Flu_int + NC_IgA, 
##     family = binomial(link = "logit"), data = data_LR)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6426  -0.9126  -0.2667   0.8101   2.7666  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -3.8050     0.7998  -4.757 1.96e-06 ***
## SeverityCoV2_or_Flu_int   0.8437     0.2058   4.099 4.14e-05 ***
## NC_IgA                    0.4442     0.2053   2.164   0.0305 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 135.80  on 111  degrees of freedom
## Residual deviance: 100.11  on 109  degrees of freedom
## AIC: 106.11
## 
## Number of Fisher Scoring iterations: 5
additional_model <- glm(Thrombosis_group ~ ß2GPI_IgM + GCSF + Inflammatory_index + IL4 + IL6 + SDF1alpha, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
summary(additional_model)
## 
## Call:
## glm(formula = Thrombosis_group ~ ß2GPI_IgM + GCSF + Inflammatory_index + 
##     IL4 + IL6 + SDF1alpha, family = binomial(link = "logit"), 
##     data = data_LR)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8902  -0.8280  -0.5771   0.8655   1.9811  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)        -0.7094513  0.7997162  -0.887   0.3750  
## ß2GPI_IgM          -0.0093652  0.0117086  -0.800   0.4238  
## GCSF                0.0127408  0.0089582   1.422   0.1550  
## Inflammatory_index -0.0544647  0.0705607  -0.772   0.4402  
## IL4                 0.1503516  0.0783958   1.918   0.0551 .
## IL6                 0.0055507  0.0028088   1.976   0.0481 *
## SDF1alpha          -0.0005150  0.0004565  -1.128   0.2593  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 135.8  on 111  degrees of freedom
## Residual deviance: 116.6  on 105  degrees of freedom
## AIC: 130.6
## 
## Number of Fisher Scoring iterations: 5
cyto_model <- glm(Thrombosis_group ~ GCSF + Inflammatory_index + IL4 + IL6 + SDF1alpha, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
summary(cyto_model)
## 
## Call:
## glm(formula = Thrombosis_group ~ GCSF + Inflammatory_index + 
##     IL4 + IL6 + SDF1alpha, family = binomial(link = "logit"), 
##     data = data_LR)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7926  -0.8041  -0.5938   0.9100   2.0399  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)  
## (Intercept)        -0.9662867  0.7418521  -1.303   0.1927  
## GCSF                0.0128858  0.0089870   1.434   0.1516  
## Inflammatory_index -0.0526122  0.0712693  -0.738   0.4604  
## IL4                 0.1297307  0.0732641   1.771   0.0766 .
## IL6                 0.0056352  0.0027324   2.062   0.0392 *
## SDF1alpha          -0.0005094  0.0004574  -1.114   0.2654  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 135.80  on 111  degrees of freedom
## Residual deviance: 117.24  on 106  degrees of freedom
## AIC: 129.24
## 
## Number of Fisher Scoring iterations: 5
aPL_model <- glm(Thrombosis_group ~ ß2GPI_IgM, 
                  data = data_LR, 
                  family = binomial(link = 'logit'))
summary(aPL_model)
## 
## Call:
## glm(formula = Thrombosis_group ~ ß2GPI_IgM, family = binomial(link = "logit"), 
##     data = data_LR)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8776  -0.8399  -0.8195   1.5104   1.6471  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -0.755591   0.364626  -2.072   0.0382 *
## ß2GPI_IgM   -0.003887   0.010046  -0.387   0.6989  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 135.80  on 111  degrees of freedom
## Residual deviance: 135.65  on 110  degrees of freedom
## AIC: 139.65
## 
## Number of Fisher Scoring iterations: 4
probabilities <- best_model %>% predict(data_LR, type = "response")
summary(probabilities)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0003468 0.0100178 0.1248446 0.2946429 0.5585479 0.9995669
probabilities_AIC <- best_model_AIC %>% predict(data_LR, type = "response")
summary(probabilities_AIC)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
## 0.0007961 0.0130705 0.1346276 0.2946429 0.5711042 0.9998359
probabilities_minimum <- minimum_model %>% predict(data_LR, type = "response")
summary(probabilities_minimum)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.02177 0.03663 0.36307 0.29464 0.44258 0.80316
probabilities_additional <- additional_model %>% predict(data_LR, type = "response")
summary(probabilities_additional)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0514  0.1574  0.2385  0.2946  0.3550  1.0000
probabilities_cyto <- cyto_model %>% predict(data_LR, type = "response")
summary(probabilities_cyto)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.05657 0.16849 0.22916 0.29464 0.35664 0.99981
probabilities_aPL <- aPL_model %>% predict(data_LR, type = "response")
summary(probabilities_aPL)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2451  0.2842  0.2920  0.2946  0.3056  0.3196
contrasts(as.factor(data_LR$Thrombosis_group))
##   1
## 0 0
## 1 1
predicted.classes <- ifelse(probabilities > 0.5, "1", "0")
print(predicted.classes)
##  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "0" "1" "0" "0" "0" "0" "0" 
##  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
## "0" "0" "1" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "1" "1" "0" "0" "0" 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
## "0" "0" "0" "0" "0" "0" "1" "0" "0" "1" "0" "0" "0" "0" "0" "1" "1" "1" "1" "1" 
## 111 112 113 114 115 116 117 118 119 133 134 135 136 137 138 139 140 141 142 143 
## "0" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "0" "1" "0" 
## 144 145 146 147 148 149 150 151 152 153 154 155 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
predicted.classes_AIC <- ifelse(probabilities_AIC > 0.5, "1", "0")
print(predicted.classes_AIC)
##  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "0" "1" "0" "0" "1" "0" "0" 
##  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
## "0" "0" "1" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "1" "1" "0" "0" "0" 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
## "0" "0" "0" "0" "0" "0" "1" "0" "0" "1" "0" "0" "0" "0" "0" "1" "1" "1" "1" "1" 
## 111 112 113 114 115 116 117 118 119 133 134 135 136 137 138 139 140 141 142 143 
## "1" "0" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "0" "1" "0" 
## 144 145 146 147 148 149 150 151 152 153 154 155 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
predicted.classes_minimum <- ifelse(probabilities_minimum > 0.5, "1", "0")
print(predicted.classes_minimum)
##  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50 
## "0" "0" "0" "0" "1" "0" "0" "0" "0" "0" "0" "1" "0" "0" "1" "0" "0" "0" "0" "0" 
##  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70 
## "0" "0" "0" "0" "1" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "0" "0" "0" "0" "0" "0" "1" "1" "0" 
## 111 112 113 114 115 116 117 118 119 133 134 135 136 137 138 139 140 141 142 143 
## "0" "0" "0" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "1" "0" "0" "0" "0" 
## 144 145 146 147 148 149 150 151 152 153 154 155 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
predicted.classes_additional <- ifelse(probabilities_additional > 0.5, "1", "0")
print(predicted.classes_additional)
##  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "0" 
##  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "1" "0" "0" "0" "0" 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "1" "0" "0" "0" 
## 111 112 113 114 115 116 117 118 119 133 134 135 136 137 138 139 140 141 142 143 
## "0" "0" "0" "1" "0" "1" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0" "1" "0" "1" "0" 
## 144 145 146 147 148 149 150 151 152 153 154 155 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
predicted.classes_cyto <- ifelse(probabilities_cyto > 0.5, "1", "0")
print(predicted.classes_cyto)
##  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "0" 
##  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "0" "0" "0" "1" "1" "0" "0" "0" "0" 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "1" "1" "0" "0" "0" 
## 111 112 113 114 115 116 117 118 119 133 134 135 136 137 138 139 140 141 142 143 
## "0" "0" "0" "1" "0" "1" "1" "0" "0" "0" "1" "1" "0" "1" "0" "0" "1" "0" "1" "0" 
## 144 145 146 147 148 149 150 151 152 153 154 155 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
predicted.classes_aPL <- ifelse(probabilities_aPL > 0.5, "1", "0")
print(predicted.classes_aPL)
##  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
##  91  92  93  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
## 111 112 113 114 115 116 117 118 119 133 134 135 136 137 138 139 140 141 142 143 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" 
## 144 145 146 147 148 149 150 151 152 153 154 155 
## "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0" "0"
mean(predicted.classes == data_LR$Thrombosis_group)
## [1] 0.9196429
mean(predicted.classes_AIC == data_LR$Thrombosis_group)
## [1] 0.9107143
mean(predicted.classes_minimum == data_LR$Thrombosis_group)
## [1] 0.8303571
mean(predicted.classes_additional == data_LR$Thrombosis_group)
## [1] 0.7857143
mean(predicted.classes_cyto == data_LR$Thrombosis_group)
## [1] 0.7767857
mean(predicted.classes_aPL == data_LR$Thrombosis_group)
## [1] 0.7053571
# We can visualise these models in six independent plots

data_LR$Thrombosis_group_predicted <- as.numeric(predicted.classes)
data_LR$Thrombosis_group_predicted_prob <- as.numeric(probabilities)

data_LR$Thrombosis_group_predicted_AIC <- as.numeric(predicted.classes_AIC)
data_LR$Thrombosis_group_predicted_prob_AIC <- as.numeric(probabilities_AIC)

data_LR$Thrombosis_group_predicted_minimum <- as.numeric(predicted.classes_minimum)
data_LR$Thrombosis_group_predicted_prob_minimum <- as.numeric(probabilities_minimum)

data_LR$Thrombosis_group_predicted_additional <- as.numeric(predicted.classes_additional)
data_LR$Thrombosis_group_predicted_prob_additional <- as.numeric(probabilities_additional)

data_LR$Thrombosis_group_predicted_cyto <- as.numeric(predicted.classes_cyto)
data_LR$Thrombosis_group_predicted_prob_cyto <- as.numeric(probabilities_cyto)

data_LR$Thrombosis_group_predicted_aPL <- as.numeric(predicted.classes_aPL)
data_LR$Thrombosis_group_predicted_prob_aPL <- as.numeric(probabilities_aPL)

ggplot(data = data_LR, aes(x = Thrombosis_group_predicted_prob, y = Thrombosis_group)) +
  geom_pointrange(aes(ymin = Thrombosis_group, ymax = Thrombosis_group)) +
  geom_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='blue') +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='blue', lty='dotted', geom='ribbon', fill=NA) +
  labs(y="Observed values",x="Predicted values (best model)",title="")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = data_LR, aes(x = Thrombosis_group_predicted_prob_AIC, y = Thrombosis_group)) +
  geom_pointrange(aes(ymin = Thrombosis_group, ymax = Thrombosis_group)) +
  geom_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='green') +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='green', lty='dotted', geom='ribbon', fill=NA) +
  labs(y="Observed values",x="Predicted values (AIC-improved model)",title="")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = data_LR, aes(x = Thrombosis_group_predicted_prob_minimum, y = Thrombosis_group)) +
  geom_pointrange(aes(ymin = Thrombosis_group, ymax = Thrombosis_group)) +
  geom_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='brown') +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='brown', lty='dotted', geom='ribbon', fill=NA) +
  labs(y="Observed values",x="Predicted values (minimal model)",title="")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = data_LR, aes(x = Thrombosis_group_predicted_prob_additional, y = Thrombosis_group)) +
  geom_pointrange(aes(ymin = Thrombosis_group, ymax = Thrombosis_group)) +
  geom_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='violet') +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='violet', lty='dotted', geom='ribbon', fill=NA) +
  labs(y="Observed values",x="Predicted values (additional model)",title="")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = data_LR, aes(x = Thrombosis_group_predicted_prob_cyto, y = Thrombosis_group)) +
  geom_pointrange(aes(ymin = Thrombosis_group, ymax = Thrombosis_group)) +
  geom_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='grey') +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='grey', lty='dotted', geom='ribbon', fill=NA) +
  labs(y="Observed values",x="Predicted values (additional model)",title="")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggplot(data = data_LR, aes(x = Thrombosis_group_predicted_prob_aPL, y = Thrombosis_group)) +
  geom_pointrange(aes(ymin = Thrombosis_group, ymax = Thrombosis_group)) +
  geom_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='black') +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial()), colour='black', lty='dotted', geom='ribbon', fill=NA) +
  labs(y="Observed values",x="Predicted values (additional model)",title="")
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# We use an alternative visualisation, more condensed
Thrombosis_group_predicted_prob <- data.frame(prob=as.numeric(probabilities)) %>%
  dplyr::mutate(Model='A. best model') %>%
  data.frame(observed=data_LR$Thrombosis_group)

Thrombosis_group_predicted_prob_AIC <- data.frame(prob=as.numeric(probabilities_AIC)) %>%
  dplyr::mutate(Model='B. AIC improved model') %>%
  data.frame(observed=data_LR$Thrombosis_group)

Thrombosis_group_predicted_prob_minimum <- data.frame(prob=as.numeric(probabilities_minimum)) %>%
  dplyr::mutate(Model='C. minimal model') %>%
  data.frame(observed=data_LR$Thrombosis_group)

Thrombosis_group_predicted_prob_additional <- data.frame(prob=as.numeric(probabilities_additional)) %>%
  dplyr::mutate(Model='D. additional model') %>%
  data.frame(observed=data_LR$Thrombosis_group)

Thrombosis_group_predicted_prob_cyto <- data.frame(prob=as.numeric(probabilities_cyto)) %>%
  dplyr::mutate(Model='E. cyto model') %>%
  data.frame(observed=data_LR$Thrombosis_group)

Thrombosis_group_predicted_prob_aPL <- data.frame(prob=as.numeric(probabilities_aPL)) %>%
  dplyr::mutate(Model='F. aPL model') %>%
  data.frame(observed=data_LR$Thrombosis_group)

Regmodel_bind = bind_rows(Thrombosis_group_predicted_prob, Thrombosis_group_predicted_prob_AIC, Thrombosis_group_predicted_prob_minimum, Thrombosis_group_predicted_prob_additional,Thrombosis_group_predicted_prob_cyto,Thrombosis_group_predicted_prob_aPL)

logregplot <-
ggplot(data = Regmodel_bind, aes(x = prob, y = observed, color=Model)) +
  geom_pointrange(aes(ymin = observed, ymax = observed)) +
  geom_smooth(method = "glm", 
              method.args = list(family = binomial())) +
  stat_smooth(method = "glm", 
              method.args = list(family = binomial()), lty='dotted', geom='ribbon', fill=NA) +
  scale_color_manual(values=c('blue','green','brown','violet','grey','black')) +
  labs(y="Observed values",x="Predicted values",title="") +
  facet_wrap(~Model,ncol=6)

8.8.3 Visualise best model in a ROC curve

# We build a ROC curve for the actual probabilities (non-binarised)
pred_object_prob <-prediction(data_LR$Thrombosis_group_predicted_prob,data_LR$Thrombosis_group)
ROC_curve_prob <-performance(pred_object_prob,"tpr","fpr")
ROC_curve_prob_GG <- data.frame(FPR=ROC_curve_prob@x.values[[1]],TPR=ROC_curve_prob@y.values[[1]]) %>%
  dplyr::mutate(Model='A. best model')
auc_best_model <- performance(pred_object_prob, measure = "auc")
auc_best_model_value <- round(auc_best_model@y.values[[1]],4)

pred_object_prob_AIC <-prediction(data_LR$Thrombosis_group_predicted_prob_AIC,data_LR$Thrombosis_group)
ROC_curve_prob_AIC <-performance(pred_object_prob_AIC,"tpr","fpr")
ROC_curve_prob_AIC_GG <- data.frame(FPR=ROC_curve_prob_AIC@x.values[[1]],TPR=ROC_curve_prob_AIC@y.values[[1]]) %>%
  dplyr::mutate(Model='B. AIC improved model')
auc_best_model_AIC <- performance(pred_object_prob_AIC, measure = "auc")
auc_best_model_AIC_value <- round(auc_best_model_AIC@y.values[[1]],4)

pred_object_prob_minimal <-prediction(data_LR$Thrombosis_group_predicted_prob_minimum,data_LR$Thrombosis_group)
ROC_curve_prob_minimal <-performance(pred_object_prob_minimal,"tpr","fpr")
ROC_curve_prob_minimal_GG <- data.frame(FPR=ROC_curve_prob_minimal@x.values[[1]],TPR=ROC_curve_prob_minimal@y.values[[1]])%>%
  dplyr::mutate(Model='C. minimal model')
auc_minimal <- performance(pred_object_prob_minimal, measure = "auc")
auc_minimal_value <- round(auc_minimal@y.values[[1]],4)

pred_object_prob_additional <-prediction(data_LR$Thrombosis_group_predicted_prob_additional,data_LR$Thrombosis_group)
ROC_curve_prob_additional <-performance(pred_object_prob_additional,"tpr","fpr")
ROC_curve_prob_additional_GG <- data.frame(FPR=ROC_curve_prob_additional@x.values[[1]],TPR=ROC_curve_prob_additional@y.values[[1]])%>%
  dplyr::mutate(Model='D. additional model')
auc_additional <- performance(pred_object_prob_additional, measure = "auc")
auc_additional_value <- round(auc_additional@y.values[[1]],4)

pred_object_prob_cyto <-prediction(data_LR$Thrombosis_group_predicted_prob_cyto,data_LR$Thrombosis_group)
ROC_curve_prob_cyto <-performance(pred_object_prob_cyto,"tpr","fpr")
ROC_curve_prob_cyto_GG <- data.frame(FPR=ROC_curve_prob_cyto@x.values[[1]],TPR=ROC_curve_prob_cyto@y.values[[1]])%>%
  dplyr::mutate(Model='E. cyto model')
auc_cyto <- performance(pred_object_prob_cyto, measure = "auc")
auc_cyto_value <- round(auc_cyto@y.values[[1]],4)

pred_object_prob_aPL <-prediction(data_LR$Thrombosis_group_predicted_prob_aPL,data_LR$Thrombosis_group)
ROC_curve_prob_aPL <-performance(pred_object_prob_aPL,"tpr","fpr")
ROC_curve_prob_aPL_GG <- data.frame(FPR=ROC_curve_prob_aPL@x.values[[1]],TPR=ROC_curve_prob_aPL@y.values[[1]])%>%
  dplyr::mutate(Model='F. aPL model')
auc_aPL <- performance(pred_object_prob_aPL, measure = "auc")
auc_aPL_value <- round(auc_aPL@y.values[[1]],4)

ROCplot = bind_rows(ROC_curve_prob_GG, ROC_curve_prob_AIC_GG, ROC_curve_prob_minimal_GG, ROC_curve_prob_additional_GG,ROC_curve_prob_cyto_GG,ROC_curve_prob_aPL_GG)

ROCplot_graph <-
ggplot() +
    geom_line(data=ROCplot,aes(x=FPR,y=TPR, color=Model), size = 0.8) +
    geom_line(aes(x=c(0,1),y=c(0,1)), color="red") +
    geom_vline(xintercept = 1) +
    geom_hline(yintercept = 0) +
    scale_color_manual(values=c('blue','green','brown','violet','grey','black')) +
    xlab("False Positive Rate (1-Specificity)") +
    ylab("True Positive Rate (Sensitivity)") +
    annotate("text", x = .75, y = .30, label = paste("AUC =", auc_best_model_value), color='blue') +
    annotate("text", x = .75, y = .25, label = paste("AUC =", auc_best_model_AIC_value), color='green') +
    annotate("text", x = .75, y = .20, label = paste("AUC =", auc_minimal_value), color='brown') +
    annotate("text", x = .75, y = .15, label = paste("AUC =", auc_additional_value), color='violet') +
    annotate("text", x = .75, y = .10, label = paste("AUC =", auc_cyto_value), color='grey') +
    annotate("text", x = .75, y = .05, label = paste("AUC =", auc_aPL_value), color='black')

ggarrange(logregplot, ROCplot_graph,
          labels = c("A", "B"),
          ncol = 1, nrow = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

What are the conclusions we can derive from all this?

  1. Under the given premises, we have not been able to establish a strong connection between the occurrence of aPL and thrombotic events:
    • aPL have not popped up in any analysis, except for B2GPI_IgM in Boruta.
    • We claim that for a hypothesis to be accepted, there needs to be solid evidence. In this case, the evidence is weak and potentially nonexistent. This means that the hypothesis that the increased aPL levels observed in individuals with concurrent or subsequent to infection with SARS-CoV-2 may be causally related to a higher incidence of thrombotic events is refuted.
    • However, the model suggests that B2GPI_IgM levels are weakly modulating the risk for thrombotic events as a part of the model of choice.
  2. We have then looked more broadly to identify potential associations with other molecular and/or clinical/demographic data:
    • Unsurprisingly, we find associations with SARS-CoV-2.
    • We find the strongest association with disease severity, i.e. patients with increased acute disease burden are more likely to develop thrombotic events concomitantly or later on.
    • SARS-CoV-2 disease severity is, in part, correlated with the strength of the antibody response against SARS-CoV-2.
    • Nevertheless, some effects were exerted by the presence of anti-SARS-CoV-2 antibodies in addition to disease severity.
    • Thus, antiviral immune reaction seems to be associated.
    • Demographic data (age/sex) had no effective predictive value.
    • Some cytokine levels were found to be additional modulators of risk of thromboses, particularly G-CSF, which is known to induce thrombocytopenia. IL-6 is a documented risk factor for coagulopathies shown here (https://www.nature.com/articles/s41569-021-00665-7) and elsewhere.

The occurrence of aPL antibodies after infection with SARS-CoV-2, which is a well-described phenomenon and has shown to be partially modulated by the strength of the anti-viral immune response, and the occurrence of thrombotic events concomitant to or subsequent to infection with SARS-CoV-2, are therefore two independent events, both of which have a common cause: infection with SARS-CoV-2.

The most exciting follow-up would be to employ this model in a further cohort to validate the predictors.

9 ORIGINAL COMPUTING ENVIRONMENT

sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22635)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_India.utf8  LC_CTYPE=English_India.utf8   
## [3] LC_MONETARY=English_India.utf8 LC_NUMERIC=C                  
## [5] LC_TIME=English_India.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rstanarm_2.21.3     Rcpp_1.0.9          Lahman_10.0-1      
##  [4] earth_5.3.1         plotmo_3.6.2        TeachingDemos_2.12 
##  [7] plotrix_3.8-2       ROCR_1.0-11         Boruta_8.0.0       
## [10] tis_1.39            MASS_7.3-58.1       Hmisc_4.7-1        
## [13] Formula_1.2-4       survival_3.4-0      lattice_0.20-45    
## [16] mgcv_1.8-41         nlme_3.1-160        viridis_0.6.2      
## [19] viridisLite_0.4.1   psych_2.2.9         ggsci_2.9          
## [22] fmsb_0.7.3          PupillometryR_0.0.4 rlang_1.0.6        
## [25] corrplot_0.92       FactoMineR_2.6      factoextra_1.0.7   
## [28] ggpubr_0.5.0        cowplot_1.1.1       ggridges_0.5.4     
## [31] dendextend_1.16.0   ggdendro_0.1.23     gplots_3.1.3       
## [34] table1_1.4.2        RColorBrewer_1.1-3  stringi_1.7.8      
## [37] broom_1.0.1         magrittr_2.0.3      forcats_0.5.2      
## [40] stringr_1.4.1       dplyr_1.0.10        purrr_0.3.5        
## [43] readr_2.1.3         tidyr_1.2.1         tibble_3.1.8       
## [46] ggplot2_3.4.0       tidyverse_1.3.2     openxlsx_4.2.5.1   
## 
## loaded via a namespace (and not attached):
##   [1] utf8_1.2.2           lme4_1.1-31          tidyselect_1.2.0    
##   [4] htmlwidgets_1.5.4    ranger_0.14.1        grid_4.2.2          
##   [7] munsell_0.5.0        codetools_0.2-18     interp_1.1-3        
##  [10] DT_0.29              miniUI_0.1.1.1       withr_2.5.0         
##  [13] colorspace_2.0-3     highr_0.9            knitr_1.41          
##  [16] rstudioapi_0.14      leaps_3.1            stats4_4.2.2        
##  [19] ggsignif_0.6.4       bayesplot_1.10.0     labeling_0.4.2      
##  [22] emmeans_1.8.2        rstan_2.26.13        mnormt_2.1.1        
##  [25] farver_2.1.1         coda_0.19-4          vctrs_0.5.0         
##  [28] generics_0.1.3       TH.data_1.1-1        xfun_0.34           
##  [31] timechange_0.1.1     R6_2.5.1             markdown_1.4        
##  [34] bitops_1.0-7         cachem_1.0.6         assertthat_0.2.1    
##  [37] promises_1.2.0.1     scales_1.2.1         multcomp_1.4-23     
##  [40] nnet_7.3-18          googlesheets4_1.0.1  gtable_0.3.1        
##  [43] multcompView_0.1-8   processx_3.8.0       sandwich_3.0-2      
##  [46] scatterplot3d_0.3-42 splines_4.2.2        rstatix_0.7.1       
##  [49] gargle_1.2.1         inline_0.3.19        checkmate_2.1.0     
##  [52] yaml_2.3.6           reshape2_1.4.4       abind_1.4-5         
##  [55] modelr_0.1.10        threejs_0.3.3        crosstalk_1.2.0     
##  [58] backports_1.4.1      httpuv_1.6.6         tools_4.2.2         
##  [61] ellipsis_0.3.2       jquerylib_0.1.4      plyr_1.8.8          
##  [64] base64enc_0.1-3      prettyunits_1.1.1    ps_1.7.2            
##  [67] rpart_4.1.19         deldir_1.0-6         zoo_1.8-11          
##  [70] haven_2.5.1          ggrepel_0.9.2        cluster_2.1.4       
##  [73] fs_1.5.2             data.table_1.14.4    colourpicker_1.2.0  
##  [76] reprex_2.0.2         googledrive_2.0.0    mvtnorm_1.1-3       
##  [79] matrixStats_0.62.0   hms_1.1.2            shinyjs_2.1.0       
##  [82] mime_0.12            evaluate_0.18        xtable_1.8-4        
##  [85] shinystan_2.6.0      jpeg_0.1-9           readxl_1.4.1        
##  [88] gridExtra_2.3        rstantools_2.2.0     compiler_4.2.2      
##  [91] V8_4.2.2             KernSmooth_2.23-20   crayon_1.5.2        
##  [94] minqa_1.2.5          StanHeaders_2.26.13  htmltools_0.5.3     
##  [97] later_1.3.0          tzdb_0.3.0           RcppParallel_5.1.5  
## [100] lubridate_1.9.0      DBI_1.1.3            dbplyr_2.2.1        
## [103] boot_1.3-28          Matrix_1.5-1         car_3.1-1           
## [106] cli_3.4.1            parallel_4.2.2       igraph_1.3.5        
## [109] pkgconfig_2.0.3      flashClust_1.01-2    foreign_0.8-83      
## [112] xml2_1.3.3           dygraphs_1.1.1.6     bslib_0.4.1         
## [115] estimability_1.4.1   rvest_1.0.3          callr_3.7.3         
## [118] digest_0.6.30        rmarkdown_2.18       cellranger_1.1.0    
## [121] htmlTable_2.4.1      curl_4.3.3           shiny_1.7.3         
## [124] gtools_3.9.3         nloptr_2.0.3         lifecycle_1.0.3     
## [127] jsonlite_1.8.3       carData_3.0-5        fansi_1.0.3         
## [130] pillar_1.8.1         loo_2.5.1            pkgbuild_1.4.0      
## [133] fastmap_1.1.0        httr_1.4.4           glue_1.6.2          
## [136] xts_0.12.2           zip_2.2.2            png_0.1-7           
## [139] shinythemes_1.2.0    sass_0.4.2           latticeExtra_0.6-30 
## [142] caTools_1.18.2

  1. BEL, D-BSSE, ETH Zurich↩︎

  2. University Hospital Zurich and University Hospital Basel, ↩︎